diff --git a/engine/docker/dev-host/grafana/dashboards/api.json b/engine/docker/dev-host/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-host/grafana/dashboards/api.json +++ b/engine/docker/dev-host/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-host/grafana/dashboards/cache.json b/engine/docker/dev-host/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-host/grafana/dashboards/cache.json +++ b/engine/docker/dev-host/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-host/grafana/dashboards/futures.json b/engine/docker/dev-host/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-host/grafana/dashboards/futures.json +++ b/engine/docker/dev-host/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-host/grafana/dashboards/gasoline.json b/engine/docker/dev-host/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-host/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-host/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-host/grafana/dashboards/guard.json b/engine/docker/dev-host/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-host/grafana/dashboards/guard.json +++ b/engine/docker/dev-host/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-host/grafana/grafana.ini b/engine/docker/dev-host/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-host/grafana/grafana.ini +++ b/engine/docker/dev-host/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-host/otel-collector-server/config.yaml b/engine/docker/dev-host/otel-collector-server/config.yaml index 15f12073f0..c4414738d1 100644 --- a/engine/docker/dev-host/otel-collector-server/config.yaml +++ b/engine/docker/dev-host/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: default + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini b/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini +++ b/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml b/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml index a74179019e..64004c2dc7 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-a + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml b/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml index a74179019e..7dae8a71d0 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-b + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml b/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml index a74179019e..a4fd830662 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-c + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/docker-compose.yml b/engine/docker/dev-multidc-multinode/docker-compose.yml index f5201df8f2..7fbbc3f111 100644 --- a/engine/docker/dev-multidc-multinode/docker-compose.yml +++ b/engine/docker/dev-multidc-multinode/docker-compose.yml @@ -137,6 +137,8 @@ services: networks: - rivet-network-dc-a - rivet-network-dc-a-to-core + ports: + - '4317:4317' otel-collector-client-dc-a: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -404,6 +406,8 @@ services: networks: - rivet-network-dc-b - rivet-network-dc-b-to-core + ports: + - '4317:4317' otel-collector-client-dc-b: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -667,6 +671,8 @@ services: networks: - rivet-network-dc-c - rivet-network-dc-c-to-core + ports: + - '4317:4317' otel-collector-client-dc-c: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/api.json b/engine/docker/dev-multidc/core/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/api.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/cache.json b/engine/docker/dev-multidc/core/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/cache.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/futures.json b/engine/docker/dev-multidc/core/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/futures.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json b/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-multidc/core/grafana/grafana.ini b/engine/docker/dev-multidc/core/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-multidc/core/grafana/grafana.ini +++ b/engine/docker/dev-multidc/core/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml b/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml index a74179019e..64004c2dc7 100644 --- a/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-a + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml b/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml index a74179019e..7dae8a71d0 100644 --- a/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-b + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml b/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml index a74179019e..a4fd830662 100644 --- a/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-c + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc/docker-compose.yml b/engine/docker/dev-multidc/docker-compose.yml index 96c1878833..e71dfa07f2 100644 --- a/engine/docker/dev-multidc/docker-compose.yml +++ b/engine/docker/dev-multidc/docker-compose.yml @@ -136,6 +136,8 @@ services: networks: - rivet-network-dc-a - rivet-network-dc-a-to-core + ports: + - '4317:4317' otel-collector-client-dc-a: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -289,6 +291,8 @@ services: networks: - rivet-network-dc-b - rivet-network-dc-b-to-core + ports: + - '4317:4317' otel-collector-client-dc-b: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -438,6 +442,8 @@ services: networks: - rivet-network-dc-c - rivet-network-dc-c-to-core + ports: + - '4317:4317' otel-collector-client-dc-c: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped diff --git a/engine/docker/dev-multinode/docker-compose.yml b/engine/docker/dev-multinode/docker-compose.yml index fb05f523db..7deffc9227 100644 --- a/engine/docker/dev-multinode/docker-compose.yml +++ b/engine/docker/dev-multinode/docker-compose.yml @@ -130,6 +130,8 @@ services: networks: - rivet-network - rivet-network-to-core + ports: + - '4317:4317' otel-collector-client: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped diff --git a/engine/docker/dev-multinode/grafana/dashboards/api.json b/engine/docker/dev-multinode/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/api.json +++ b/engine/docker/dev-multinode/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multinode/grafana/dashboards/cache.json b/engine/docker/dev-multinode/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/cache.json +++ b/engine/docker/dev-multinode/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multinode/grafana/dashboards/futures.json b/engine/docker/dev-multinode/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/futures.json +++ b/engine/docker/dev-multinode/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multinode/grafana/dashboards/gasoline.json b/engine/docker/dev-multinode/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multinode/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multinode/grafana/dashboards/guard.json b/engine/docker/dev-multinode/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/guard.json +++ b/engine/docker/dev-multinode/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-multinode/grafana/grafana.ini b/engine/docker/dev-multinode/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-multinode/grafana/grafana.ini +++ b/engine/docker/dev-multinode/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-multinode/otel-collector-server/config.yaml b/engine/docker/dev-multinode/otel-collector-server/config.yaml index a74179019e..7b5fc80c72 100644 --- a/engine/docker/dev-multinode/otel-collector-server/config.yaml +++ b/engine/docker/dev-multinode/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: default + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev/grafana/dashboards/api.json b/engine/docker/dev/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev/grafana/dashboards/api.json +++ b/engine/docker/dev/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev/grafana/dashboards/cache.json b/engine/docker/dev/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev/grafana/dashboards/cache.json +++ b/engine/docker/dev/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev/grafana/dashboards/futures.json b/engine/docker/dev/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev/grafana/dashboards/futures.json +++ b/engine/docker/dev/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev/grafana/dashboards/gasoline.json b/engine/docker/dev/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev/grafana/dashboards/gasoline.json +++ b/engine/docker/dev/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev/grafana/dashboards/guard.json b/engine/docker/dev/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev/grafana/dashboards/guard.json +++ b/engine/docker/dev/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev/grafana/grafana.ini b/engine/docker/dev/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev/grafana/grafana.ini +++ b/engine/docker/dev/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev/otel-collector-server/config.yaml b/engine/docker/dev/otel-collector-server/config.yaml index a74179019e..7b5fc80c72 100644 --- a/engine/docker/dev/otel-collector-server/config.yaml +++ b/engine/docker/dev/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: default + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/template/grafana-dashboards/api.json b/engine/docker/template/grafana-dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/template/grafana-dashboards/api.json +++ b/engine/docker/template/grafana-dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/template/grafana-dashboards/cache.json b/engine/docker/template/grafana-dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/template/grafana-dashboards/cache.json +++ b/engine/docker/template/grafana-dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/template/grafana-dashboards/futures.json b/engine/docker/template/grafana-dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/template/grafana-dashboards/futures.json +++ b/engine/docker/template/grafana-dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/template/grafana-dashboards/gasoline.json b/engine/docker/template/grafana-dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/template/grafana-dashboards/gasoline.json +++ b/engine/docker/template/grafana-dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/template/grafana-dashboards/guard.json b/engine/docker/template/grafana-dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/template/grafana-dashboards/guard.json +++ b/engine/docker/template/grafana-dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/template/src/docker-compose.ts b/engine/docker/template/src/docker-compose.ts index 7507ec74bd..5daa3b2182 100644 --- a/engine/docker/template/src/docker-compose.ts +++ b/engine/docker/template/src/docker-compose.ts @@ -238,6 +238,7 @@ export function generateDockerCompose(context: TemplateContext) { clickhouse: { condition: "service_healthy" }, }, networks: [dcNetworkName, dcToCoreNetworkName], + ports: ["4317:4317"], }; services[otelCollectorClientServiceName] = { diff --git a/engine/docker/template/src/services/core/grafana.ts b/engine/docker/template/src/services/core/grafana.ts index d5e3ad2fa0..813e44ffba 100644 --- a/engine/docker/template/src/services/core/grafana.ts +++ b/engine/docker/template/src/services/core/grafana.ts @@ -17,7 +17,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/template/src/services/edge/otel-collector-server.ts b/engine/docker/template/src/services/edge/otel-collector-server.ts index 83d2afc1b6..5b57dc4e7c 100644 --- a/engine/docker/template/src/services/edge/otel-collector-server.ts +++ b/engine/docker/template/src/services/edge/otel-collector-server.ts @@ -3,7 +3,7 @@ import type { TemplateContext } from "../../context"; export function generateDatacenterOtelCollectorServer( context: TemplateContext, - dcId: string, + dcName: string, ) { const clickhouseHost = context.config.networkMode === "host" ? "127.0.0.1" : "clickhouse"; @@ -18,6 +18,20 @@ export function generateDatacenterOtelCollectorServer( }, }, processors: { + resource: { + attributes: [ + { + key: "rivet.project", + value: "dev", + action: "upsert", + }, + { + key: "rivet.datacenter", + value: dcName, + action: "upsert", + }, + ], + }, batch: { timeout: "5s", send_batch_size: 10000, @@ -65,17 +79,17 @@ export function generateDatacenterOtelCollectorServer( pipelines: { logs: { receivers: ["otlp"], - processors: ["batch"], + processors: ["resource", "batch"], exporters: ["clickhouse"], }, traces: { receivers: ["otlp"], - processors: ["batch"], + processors: ["resource", "batch"], exporters: ["clickhouse"], }, metrics: { receivers: ["otlp"], - processors: ["batch"], + processors: ["resource", "batch"], exporters: ["clickhouse"], }, }, @@ -86,7 +100,7 @@ export function generateDatacenterOtelCollectorServer( context.writeDatacenterServiceFile( "otel-collector-server", - dcId, + dcName, "config.yaml", yamlContent, ); diff --git a/engine/packages/api-builder/src/middleware.rs b/engine/packages/api-builder/src/middleware.rs index ba8f4e16fe..67168cd2cc 100644 --- a/engine/packages/api-builder/src/middleware.rs +++ b/engine/packages/api-builder/src/middleware.rs @@ -195,9 +195,11 @@ pub async fn http_logging_middleware( ); let error_code: String = if status.is_success() { - "".into() + String::new() + } else if let Some(err) = &error { + format!("{}.{}", err.group, err.code) } else { - status.to_string() + String::new() }; metrics::API_REQUEST_DURATION.record( duration, diff --git a/engine/packages/gasoline/src/ctx/workflow.rs b/engine/packages/gasoline/src/ctx/workflow.rs index 8987624f31..086104c34c 100644 --- a/engine/packages/gasoline/src/ctx/workflow.rs +++ b/engine/packages/gasoline/src/ctx/workflow.rs @@ -223,7 +223,7 @@ impl WorkflowCtx { 1, &[ KeyValue::new("workflow_name", self.name.clone()), - KeyValue::new("error_code", err.to_string()), + KeyValue::new("error", err.to_string()), ], ); } diff --git a/engine/packages/gasoline/src/metrics.rs b/engine/packages/gasoline/src/metrics.rs index 206089e619..71afdd524b 100644 --- a/engine/packages/gasoline/src/metrics.rs +++ b/engine/packages/gasoline/src/metrics.rs @@ -51,7 +51,7 @@ lazy_static::lazy_static! { pub static ref WORKFLOW_ACTIVE: Gauge = METER.u64_gauge("rivet_gasoline_workflow_active") .with_description("Total active workflows.") .build(); - /// Expected attributes: "workflow_name", "error_code" + /// Expected attributes: "workflow_name", "error" pub static ref WORKFLOW_DEAD: Gauge = METER.u64_gauge("rivet_gasoline_workflow_dead") .with_description("Total dead workflows.") .build(); @@ -59,7 +59,7 @@ lazy_static::lazy_static! { pub static ref WORKFLOW_SLEEPING: Gauge = METER.u64_gauge("rivet_gasoline_workflow_sleeping") .with_description("Total sleeping workflows.") .build(); - /// Expected attributes: "workflow_name", "error_code" + /// Expected attributes: "workflow_name", "error" pub static ref WORKFLOW_ERRORS: Counter = METER.u64_counter("rivet_gasoline_workflow_errors") .with_description("All errors made in a workflow.") .build(); diff --git a/engine/packages/metrics/src/buckets.rs b/engine/packages/metrics/src/buckets.rs index 56bb53c950..b719898714 100644 --- a/engine/packages/metrics/src/buckets.rs +++ b/engine/packages/metrics/src/buckets.rs @@ -1,22 +1,22 @@ pub const BUCKETS: &[f64] = &[ - // Added + // For otel + 0.0, // Added 0.001, 0.0025, // Copied from https://docs.rs/prometheus/latest/src/prometheus/histogram.rs.html#25-27 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, // Added 25.0, 50.0, 100.0, 250.0, 500.0, ]; -pub const PROVISION_BUCKETS: &[f64] = &[ - 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 35.0, 50.0, 75.0, 100.0, 125.0, 250.0, 500.0, 1000.0, -]; - pub const MICRO_BUCKETS: &[f64] = &[ - 0.0001, 0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.5, 1.0, 2.5, - 5.0, 10.0, 25.0, 50.0, + // For otel + 0.0, 0.0001, 0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.5, 1.0, + 2.5, 5.0, 10.0, 25.0, 50.0, ]; // Calculated based on the LogHistogram configuration in `packages/common/runtime/src/lib.rs` pub const TASK_POLL_BUCKETS: &[f64] = &[ + // For otel + 0.0, 0.00002, 0.000032768, 0.000065536, diff --git a/engine/packages/metrics/src/providers.rs b/engine/packages/metrics/src/providers.rs index 72771180ee..d54da249dc 100644 --- a/engine/packages/metrics/src/providers.rs +++ b/engine/packages/metrics/src/providers.rs @@ -119,7 +119,7 @@ fn init_tracer_provider() -> SdkTracerProvider { fn init_meter_provider() -> SdkMeterProvider { let exporter = opentelemetry_otlp::MetricExporter::builder() .with_tonic() - .with_temporality(opentelemetry_sdk::metrics::Temporality::Delta) + .with_temporality(opentelemetry_sdk::metrics::Temporality::Cumulative) .with_protocol(opentelemetry_otlp::Protocol::Grpc) .with_endpoint(otel_grpc_endpoint()) .build() diff --git a/engine/packages/pegboard/src/metrics.rs b/engine/packages/pegboard/src/metrics.rs index b529b8a0ac..30324e7d31 100644 --- a/engine/packages/pegboard/src/metrics.rs +++ b/engine/packages/pegboard/src/metrics.rs @@ -1,44 +1,14 @@ use rivet_metrics::{ - BUCKETS, MICRO_BUCKETS, + MICRO_BUCKETS, otel::{global::*, metrics::*}, }; lazy_static::lazy_static! { static ref METER: Meter = meter("rivet-pegboard"); - /// Expected attributes: "client_id", "index" - pub static ref CLIENT_DUPLICATE_EVENT: Counter = METER.u64_counter("rivet_pegboard_client_duplicate_event") - .with_description("Duplicate client event that was attempted to be inserted.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_CPU_TOTAL: Gauge = METER.f64_gauge("rivet_pegboard_client_cpu_total") - .with_description("Total millicores of cpu available on a client.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_MEMORY_TOTAL: Gauge = METER.f64_gauge("rivet_pegboard_client_memory_total") - .with_description("Total MiB of memory available on a client.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_CPU_ALLOCATED: Gauge = METER.f64_gauge("rivet_pegboard_client_cpu_allocated") - .with_description("Total millicores of cpu allocated on a client.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_MEMORY_ALLOCATED: Gauge = METER.f64_gauge("rivet_pegboard_client_memory_allocated") - .with_description("Total MiB of memory allocated on a client.") - .build(); - - /// Has no expected attributes - pub static ref ACTOR_CPU_PENDING_ALLOCATION: Gauge = METER.f64_gauge("rivet_pegboard_actor_cpu_pending_allocation") - .with_description("Total actor cpu waiting for availability.") - .build(); - - /// Has no expected attributes - pub static ref ACTOR_MEMORY_PENDING_ALLOCATION: Gauge = METER.f64_gauge("rivet_pegboard_actor_memory_pending_allocation") - .with_description("Total actor memory waiting for availability.") + /// Expected attributes: "namespace_id", "runner_name" + pub static ref ACTOR_PENDING_ALLOCATION: Gauge = METER.f64_gauge("rivet_pegboard_actor_pending_allocation") + .with_description("Total actors waiting for availability.") .build(); /// Expected attributes: "did_reserve" @@ -46,20 +16,4 @@ lazy_static::lazy_static! { .with_description("Total duration to reserve resources for an actor.") .with_boundaries(MICRO_BUCKETS.to_vec()) .build(); - - /// Has no expected attributes - pub static ref ACTOR_START_DURATION: Histogram = METER.f64_histogram("rivet_pegboard_actor_start_duration") - .with_description("Total duration from actor creation to starting state.") - .with_boundaries(BUCKETS.to_vec()) - .build(); - - /// Expected attributes: "env_id", "flavor" - pub static ref ENV_CPU_USAGE: Gauge = METER.f64_gauge("rivet_pegboard_env_cpu_usage") - .with_description("Total millicores used by an environment.") - .build(); - - /// Expected attributes: "env_id", "flavor" - pub static ref ENV_MEMORY_USAGE: Gauge = METER.f64_gauge("rivet_pegboard_env_memory_usage") - .with_description("Total MiB of memory used by an environment.") - .build(); } diff --git a/engine/packages/pegboard/src/workflows/runner.rs b/engine/packages/pegboard/src/workflows/runner.rs index 43c1c876e6..c25c11a3d1 100644 --- a/engine/packages/pegboard/src/workflows/runner.rs +++ b/engine/packages/pegboard/src/workflows/runner.rs @@ -1,6 +1,7 @@ use futures_util::{FutureExt, StreamExt, TryStreamExt}; use gas::prelude::*; use rivet_data::converted::{ActorNameKeyData, MetadataKeyData, RunnerByKeyKeyData}; +use rivet_metrics::KeyValue; use rivet_runner_protocol::{self as protocol, PROTOCOL_VERSION, versioned}; use universaldb::{ options::{ConflictRangeType, StreamingMode}, @@ -9,7 +10,7 @@ use universaldb::{ use universalpubsub::PublishOpts; use vbare::OwnedVersionedData; -use crate::{keys, workflows::actor::Allocate}; +use crate::{keys, metrics, workflows::actor::Allocate}; /// How long after last ping before considering a runner ineligible for allocation. pub const RUNNER_ELIGIBLE_THRESHOLD_MS: i64 = util::duration::seconds(10); @@ -989,11 +990,11 @@ pub(crate) async fn allocate_pending_actors( input: &AllocatePendingActorsInput, ) -> Result { // NOTE: This txn should closely resemble the one found in the allocate_actor activity of the actor wf - let res = ctx + let (allocations, pending_actor_count) = ctx .udb()? .run(|tx| async move { let tx = tx.with_subspace(keys::subspace()); - let mut results = Vec::new(); + let mut allocations = Vec::new(); let pending_actor_subspace = keys::subspace().subspace( &keys::ns::PendingActorByRunnerNameSelectorKey::subspace( @@ -1010,6 +1011,7 @@ pub(crate) async fn allocate_pending_actors( // the one we choose Snapshot, ); + let mut pending_actor_count = 0; let ping_threshold_ts = util::timestamp::now() - RUNNER_ELIGIBLE_THRESHOLD_MS; 'queue_loop: loop { @@ -1017,6 +1019,8 @@ pub(crate) async fn allocate_pending_actors( break; }; + pending_actor_count += 1; + let (queue_key, generation) = tx.read_entry::(&queue_entry)?; @@ -1115,23 +1119,33 @@ pub(crate) async fn allocate_pending_actors( generation, )?; - results.push(ActorAllocation { + allocations.push(ActorAllocation { actor_id: queue_key.actor_id, signal: Allocate { runner_id: old_runner_alloc_key.runner_id, runner_workflow_id: old_runner_alloc_key_data.workflow_id, }, }); + + pending_actor_count -= 1; continue 'queue_loop; } } - Ok(results) + Ok((allocations, pending_actor_count)) }) .custom_instrument(tracing::info_span!("runner_allocate_pending_actors_tx")) .await?; - Ok(AllocatePendingActorsOutput { allocations: res }) + metrics::ACTOR_PENDING_ALLOCATION.record( + pending_actor_count as f64, + &[ + KeyValue::new("namespace_id", input.namespace_id.to_string()), + KeyValue::new("runner_name", input.name.to_string()), + ], + ); + + Ok(AllocatePendingActorsOutput { allocations }) } #[derive(Debug, Serialize, Deserialize, Hash)]