From 16339147fb271090f54894d0f6e56e8fc4184e2c Mon Sep 17 00:00:00 2001 From: MasterPtato <23087326+MasterPtato@users.noreply.github.com> Date: Wed, 5 Nov 2025 02:21:24 +0000 Subject: [PATCH 1/3] chore: misc bug fixes, add logs for outbound req (#3332) --- engine/packages/gasoline/src/ctx/operation.rs | 2 +- engine/packages/pegboard-gateway/src/lib.rs | 1 - engine/packages/pegboard-serverless/src/lib.rs | 10 ++++++++++ .../src/workflows/actor/{actor_keys.rs => keys.rs} | 7 +++---- engine/packages/pegboard/src/workflows/actor/mod.rs | 11 ++++++----- engine/packages/pegboard/src/workflows/runner.rs | 2 -- 6 files changed, 20 insertions(+), 13 deletions(-) rename engine/packages/pegboard/src/workflows/actor/{actor_keys.rs => keys.rs} (97%) diff --git a/engine/packages/gasoline/src/ctx/operation.rs b/engine/packages/gasoline/src/ctx/operation.rs index 99fbd6071f..167b0f9af4 100644 --- a/engine/packages/gasoline/src/ctx/operation.rs +++ b/engine/packages/gasoline/src/ctx/operation.rs @@ -29,7 +29,7 @@ pub struct OperationCtx { pools: rivet_pools::Pools, cache: rivet_cache::Cache, msg_ctx: MessageCtx, - from_workflow: bool, + pub(crate) from_workflow: bool, } impl OperationCtx { diff --git a/engine/packages/pegboard-gateway/src/lib.rs b/engine/packages/pegboard-gateway/src/lib.rs index 7542fdfe96..39bad2a7c2 100644 --- a/engine/packages/pegboard-gateway/src/lib.rs +++ b/engine/packages/pegboard-gateway/src/lib.rs @@ -364,7 +364,6 @@ impl CustomServeTrait for PegboardGateway { ) => { tracing::debug!(?close, "server closed websocket"); - if open_msg.can_hibernate && close.retry { // Successful closure return Err(WebSocketServiceRetry.build()); diff --git a/engine/packages/pegboard-serverless/src/lib.rs b/engine/packages/pegboard-serverless/src/lib.rs index a5125a90a1..19d9c9386c 100644 --- a/engine/packages/pegboard-serverless/src/lib.rs +++ b/engine/packages/pegboard-serverless/src/lib.rs @@ -387,6 +387,16 @@ async fn outbound_handler( return Ok(()); } + Err(sse::Error::InvalidStatusCode(code, res)) => { + let body = res + .text() + .await + .unwrap_or_else(|_| "".to_string()); + bail!( + "invalid status code ({code}):\n{}", + util::safe_slice(&body, 0, 512) + ); + } Err(err) => return Err(err.into()), } } diff --git a/engine/packages/pegboard/src/workflows/actor/actor_keys.rs b/engine/packages/pegboard/src/workflows/actor/keys.rs similarity index 97% rename from engine/packages/pegboard/src/workflows/actor/actor_keys.rs rename to engine/packages/pegboard/src/workflows/actor/keys.rs index ec2746e1df..f63c5d95af 100644 --- a/engine/packages/pegboard/src/workflows/actor/actor_keys.rs +++ b/engine/packages/pegboard/src/workflows/actor/keys.rs @@ -241,21 +241,20 @@ pub async fn reserve_actor_key( input.name.clone(), input.key.clone(), )); - let (start, end) = actor_key_subspace.range(); let mut stream = tx.get_ranges_keyvalues( universaldb::RangeOption { mode: StreamingMode::Iterator, - ..(start, end).into() + ..(&actor_key_subspace).into() }, Serializable, ); while let Some(entry) = stream.try_next().await? { - let (_idx_key, data) = tx.read_entry::(&entry)?; + let (idx_key, data) = tx.read_entry::(&entry)?; if !data.is_destroyed { return Ok(ReserveActorKeyOutput::ExistingActor { - existing_actor_id: _idx_key.actor_id, + existing_actor_id: idx_key.actor_id, }); } } diff --git a/engine/packages/pegboard/src/workflows/actor/mod.rs b/engine/packages/pegboard/src/workflows/actor/mod.rs index e657fad85f..8659a6a523 100644 --- a/engine/packages/pegboard/src/workflows/actor/mod.rs +++ b/engine/packages/pegboard/src/workflows/actor/mod.rs @@ -5,8 +5,8 @@ use rivet_types::actors::CrashPolicy; use crate::{errors, workflows::runner::AllocatePendingActorsInput}; -mod actor_keys; mod destroy; +mod keys; mod runtime; mod setup; @@ -147,7 +147,7 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()> .await?; if let Some(key) = &input.key { - match actor_keys::reserve_key( + match keys::reserve_key( ctx, input.namespace_id, input.name.clone(), @@ -156,8 +156,8 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()> ) .await? { - actor_keys::ReserveKeyOutput::Success => {} - actor_keys::ReserveKeyOutput::ForwardToDatacenter { dc_label } => { + keys::ReserveKeyOutput::Success => {} + keys::ReserveKeyOutput::ForwardToDatacenter { dc_label } => { ctx.msg(Failed { error: errors::Actor::KeyReservedInDifferentDatacenter { datacenter_label: dc_label, @@ -181,7 +181,7 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()> return Ok(()); } - actor_keys::ReserveKeyOutput::KeyExists { existing_actor_id } => { + keys::ReserveKeyOutput::KeyExists { existing_actor_id } => { ctx.msg(Failed { error: errors::Actor::DuplicateKey { key: key.clone(), @@ -696,6 +696,7 @@ pub struct Lost { /// Immediately reschedules the actor regardless of its crash policy. pub force_reschedule: bool, /// Resets the rescheduling retry count to 0. + #[serde(default)] pub reset_rescheduling: bool, } diff --git a/engine/packages/pegboard/src/workflows/runner.rs b/engine/packages/pegboard/src/workflows/runner.rs index 14cd0d42a5..33c4840d0c 100644 --- a/engine/packages/pegboard/src/workflows/runner.rs +++ b/engine/packages/pegboard/src/workflows/runner.rs @@ -1024,8 +1024,6 @@ pub(crate) async fn allocate_pending_actors( let mut stream = tx.get_ranges_keyvalues( universaldb::RangeOption { mode: StreamingMode::Iterator, - // Containers bin pack so we reverse the order - reverse: true, ..(&runner_alloc_subspace).into() }, // NOTE: This is not Serializable because we don't want to conflict with all of the From f0f4d2b20b55a7900f04f3261d3b968fd02d3b15 Mon Sep 17 00:00:00 2001 From: MasterPtato <23087326+MasterPtato@users.noreply.github.com> Date: Wed, 5 Nov 2025 02:21:24 +0000 Subject: [PATCH 2/3] chore: change log levels (#3333) --- .../api-public/src/runner_configs/upsert.rs | 2 +- engine/packages/cache-purge/src/lib.rs | 6 +--- engine/packages/engine/src/main.rs | 2 +- engine/packages/epoxy/src/http_client.rs | 8 ++--- .../epoxy/src/ops/explicit_prepare.rs | 4 +-- .../packages/epoxy/src/replica/decide_path.rs | 4 +-- .../epoxy/src/replica/lead_consensus.rs | 2 +- .../epoxy/src/replica/message_request.rs | 6 ++-- .../epoxy/src/replica/messages/accept.rs | 2 +- .../epoxy/src/replica/messages/accepted.rs | 2 +- .../epoxy/src/replica/messages/commit.rs | 2 +- .../epoxy/src/replica/messages/committed.rs | 2 +- .../replica/messages/download_instances.rs | 2 +- .../epoxy/src/replica/messages/pre_accept.rs | 2 +- .../epoxy/src/replica/messages/prepare.rs | 2 +- .../src/workflows/coordinator/reconfigure.rs | 22 ++++++------ .../coordinator/replica_status_change.rs | 22 ++++++------ .../epoxy/src/workflows/replica/setup.rs | 20 +++++------ engine/packages/gasoline/src/ctx/message.rs | 2 +- engine/packages/gasoline/src/ctx/workflow.rs | 2 +- engine/packages/gasoline/src/worker.rs | 5 ++- engine/packages/guard/src/lib.rs | 2 +- engine/packages/guard/src/routing/runner.rs | 2 +- engine/packages/metrics/src/providers.rs | 2 +- .../packages/pegboard/src/workflows/runner.rs | 5 ++- engine/packages/runtime/src/traces.rs | 2 +- engine/packages/service-manager/src/lib.rs | 36 +++++++++---------- .../packages/tracing-reconfigure/src/lib.rs | 8 ++--- .../src/driver/postgres/mod.rs | 4 +-- engine/packages/universalpubsub/src/pubsub.rs | 4 +-- 30 files changed, 92 insertions(+), 94 deletions(-) diff --git a/engine/packages/api-public/src/runner_configs/upsert.rs b/engine/packages/api-public/src/runner_configs/upsert.rs index b13d311ca4..ac532d3b18 100644 --- a/engine/packages/api-public/src/runner_configs/upsert.rs +++ b/engine/packages/api-public/src/runner_configs/upsert.rs @@ -171,7 +171,7 @@ async fn upsert_inner( ) .await { - tracing::warn!(?err, runner_name = ?path.runner_name, "failed to refresh runner config metadata"); + tracing::warn!(?err, runner_name=?path.runner_name, "failed to refresh runner config metadata"); } } else { tracing::debug!("endpoint config unchanged, skipping metadata refresh"); diff --git a/engine/packages/cache-purge/src/lib.rs b/engine/packages/cache-purge/src/lib.rs index 357b779986..a08b0a9c78 100644 --- a/engine/packages/cache-purge/src/lib.rs +++ b/engine/packages/cache-purge/src/lib.rs @@ -5,13 +5,11 @@ use universalpubsub::NextOutput; #[tracing::instrument(skip_all)] pub async fn start(config: rivet_config::Config, pools: rivet_pools::Pools) -> Result<()> { - tracing::info!("starting cache purge subscriber service"); - // Subscribe to cache purge updates let ups = pools.ups()?; let mut sub = ups.subscribe(CACHE_PURGE_TOPIC).await?; - tracing::info!(subject = ?CACHE_PURGE_TOPIC, "subscribed to cache purge updates"); + tracing::info!(subject=?CACHE_PURGE_TOPIC, "subscribed to cache purge updates"); // Get cache instance let cache = rivet_cache::CacheInner::from_env(&config, pools)?; @@ -42,7 +40,5 @@ pub async fn start(config: rivet_config::Config, pools: rivet_pools::Pools) -> R } } - tracing::warn!("cache purge subscriber service stopped"); - Ok(()) } diff --git a/engine/packages/engine/src/main.rs b/engine/packages/engine/src/main.rs index f1129d2f13..b352e28ce4 100644 --- a/engine/packages/engine/src/main.rs +++ b/engine/packages/engine/src/main.rs @@ -25,7 +25,7 @@ async fn main_inner() -> Result<()> { // Load config let config = rivet_config::Config::load(&cli.config).await?; - tracing::info!(config = ?*config, "loaded config"); + tracing::info!(config=?*config, "loaded config"); // Initialize telemetry (does nothing if telemetry is disabled) let _guard = rivet_telemetry::init(&config); diff --git a/engine/packages/epoxy/src/http_client.rs b/engine/packages/epoxy/src/http_client.rs index 895fbc3716..2b5a859783 100644 --- a/engine/packages/epoxy/src/http_client.rs +++ b/engine/packages/epoxy/src/http_client.rs @@ -57,7 +57,7 @@ where ) .collect::>() .await; - tracing::info!(?quorum_size, len = ?responses.len(), ?quorum_type, "fanout quorum size"); + tracing::debug!(?quorum_size, len = ?responses.len(), ?quorum_type, "fanout quorum size"); // Choose how many successful responses we need before considering a success let target_responses = match quorum_type { @@ -115,7 +115,7 @@ pub async fn send_message_to_address( let to_replica_id = request.to_replica_id; if from_replica_id == to_replica_id { - tracing::info!( + tracing::debug!( to_replica = to_replica_id, "sending message to replica directly" ); @@ -126,7 +126,7 @@ pub async fn send_message_to_address( let mut replica_url = url::Url::parse(&replica_url)?; replica_url.set_path(&format!("/v{PROTOCOL_VERSION}/epoxy/message")); - tracing::info!( + tracing::debug!( to_replica = to_replica_id, %replica_url, "sending message to replica via http" @@ -183,7 +183,7 @@ pub async fn send_message_to_address( let body = response.bytes().await?; let response_body = versioned::Response::deserialize(&body)?; - tracing::info!( + tracing::debug!( to_replica = to_replica_id, "successfully sent message via http" ); diff --git a/engine/packages/epoxy/src/ops/explicit_prepare.rs b/engine/packages/epoxy/src/ops/explicit_prepare.rs index 2445f9d034..28a8c15797 100644 --- a/engine/packages/epoxy/src/ops/explicit_prepare.rs +++ b/engine/packages/epoxy/src/ops/explicit_prepare.rs @@ -28,7 +28,7 @@ pub async fn epoxy_explicit_prepare( let replica_id = ctx.config().epoxy_replica_id(); let instance = &input.instance; - tracing::info!( + tracing::debug!( ?instance, "starting explicit prepare for potentially failed replica" ); @@ -317,7 +317,7 @@ async fn restart_phase1( commands: commands.unwrap_or_else(|| vec![]), // Empty vec for no-op }; - tracing::info!( + tracing::debug!( ?instance, commands_count = proposal.commands.len(), "restarting phase1 with propose operation" diff --git a/engine/packages/epoxy/src/replica/decide_path.rs b/engine/packages/epoxy/src/replica/decide_path.rs index 1b7022728f..2c9112f7c8 100644 --- a/engine/packages/epoxy/src/replica/decide_path.rs +++ b/engine/packages/epoxy/src/replica/decide_path.rs @@ -9,7 +9,7 @@ pub fn decide_path( pre_accept_oks: Vec, payload: &protocol::Payload, ) -> Result { - tracing::info!(instance=?payload.instance, "deciding path"); + tracing::debug!(instance=?payload.instance, "deciding path"); let mut new_payload = payload.clone(); let mut path = protocol::Path::PathFast(protocol::PathFast { @@ -29,7 +29,7 @@ pub fn decide_path( // EPaxos Steps 11 (returns PathFast) continue; } else { - tracing::info!(?pre_accept_ok.deps, "received dissenting voice"); + tracing::debug!(?pre_accept_ok.deps, "received dissenting voice"); // EPaxos Step 13 let new_deps = utils::union_deps(new_payload.deps, pre_accept_ok.deps); diff --git a/engine/packages/epoxy/src/replica/lead_consensus.rs b/engine/packages/epoxy/src/replica/lead_consensus.rs index 5af4edda10..7235650e43 100644 --- a/engine/packages/epoxy/src/replica/lead_consensus.rs +++ b/engine/packages/epoxy/src/replica/lead_consensus.rs @@ -12,7 +12,7 @@ pub async fn lead_consensus( replica_id: protocol::ReplicaId, proposal: protocol::Proposal, ) -> Result { - tracing::info!(?replica_id, "leading consensus"); + tracing::debug!(?replica_id, "leading consensus"); // EPaxos Step 1 let instance_num_key = keys::replica::InstanceNumberKey; diff --git a/engine/packages/epoxy/src/replica/message_request.rs b/engine/packages/epoxy/src/replica/message_request.rs index 7466c3331c..3d96e5c2bb 100644 --- a/engine/packages/epoxy/src/replica/message_request.rs +++ b/engine/packages/epoxy/src/replica/message_request.rs @@ -14,7 +14,7 @@ pub async fn message_request( let kind = match request.kind { protocol::RequestKind::UpdateConfigRequest(req) => { - tracing::info!( + tracing::debug!( epoch = ?req.config.epoch, replica_count = req.config.replicas.len(), "received configuration update request" @@ -103,7 +103,7 @@ pub async fn message_request( } protocol::RequestKind::CoordinatorUpdateReplicaStatusRequest(req) => { // Send signal to coordinator workflow - tracing::info!( + tracing::debug!( ?current_replica_id, update_replica_id=?req.replica_id, update_status=?req.status, @@ -124,7 +124,7 @@ pub async fn message_request( } protocol::RequestKind::BeginLearningRequest(req) => { // Send signal to replica workflow - tracing::info!(?current_replica_id, "received begin learning request"); + tracing::debug!(?current_replica_id, "received begin learning request"); ctx.signal(crate::workflows::replica::BeginLearning { config: req.config.clone().into(), diff --git a/engine/packages/epoxy/src/replica/messages/accept.rs b/engine/packages/epoxy/src/replica/messages/accept.rs index 35744520cc..90c15d48f9 100644 --- a/engine/packages/epoxy/src/replica/messages/accept.rs +++ b/engine/packages/epoxy/src/replica/messages/accept.rs @@ -17,7 +17,7 @@ pub async fn accept( instance, } = accept_req.payload; - tracing::info!(?replica_id, ?instance, "handling accept message"); + tracing::debug!(?replica_id, ?instance, "handling accept message"); // Validate ballot let current_ballot = ballot::get_ballot(tx, replica_id).await?; diff --git a/engine/packages/epoxy/src/replica/messages/accepted.rs b/engine/packages/epoxy/src/replica/messages/accepted.rs index b91d5e24e4..63ea67f678 100644 --- a/engine/packages/epoxy/src/replica/messages/accepted.rs +++ b/engine/packages/epoxy/src/replica/messages/accepted.rs @@ -18,7 +18,7 @@ pub async fn accepted( instance, } = payload; - tracing::info!(?replica_id, ?instance, "handling accepted message"); + tracing::debug!(?replica_id, ?instance, "handling accepted message"); // Create accepted log entry let current_ballot = ballot::get_ballot(tx, replica_id).await?; diff --git a/engine/packages/epoxy/src/replica/messages/commit.rs b/engine/packages/epoxy/src/replica/messages/commit.rs index e85aff3d1a..01ca1408e7 100644 --- a/engine/packages/epoxy/src/replica/messages/commit.rs +++ b/engine/packages/epoxy/src/replica/messages/commit.rs @@ -19,7 +19,7 @@ pub async fn commit( instance, } = commit_req.payload; - tracing::info!(?replica_id, ?instance, "handling commit message"); + tracing::debug!(?replica_id, ?instance, "handling commit message"); // EPaxos Step 24 let current_ballot = ballot::get_ballot(tx, replica_id).await?; diff --git a/engine/packages/epoxy/src/replica/messages/committed.rs b/engine/packages/epoxy/src/replica/messages/committed.rs index 3ad726157d..9e95ee5626 100644 --- a/engine/packages/epoxy/src/replica/messages/committed.rs +++ b/engine/packages/epoxy/src/replica/messages/committed.rs @@ -18,7 +18,7 @@ pub async fn committed( instance, } = payload; - tracing::info!(?replica_id, ?instance, "handling committed message"); + tracing::debug!(?replica_id, ?instance, "handling committed message"); // EPaxos Step 21: Create committed log entry let current_ballot = ballot::get_ballot(tx, replica_id).await?; diff --git a/engine/packages/epoxy/src/replica/messages/download_instances.rs b/engine/packages/epoxy/src/replica/messages/download_instances.rs index 2f7c9b37e7..7942a61252 100644 --- a/engine/packages/epoxy/src/replica/messages/download_instances.rs +++ b/engine/packages/epoxy/src/replica/messages/download_instances.rs @@ -12,7 +12,7 @@ pub async fn download_instances( replica_id: ReplicaId, req: protocol::DownloadInstancesRequest, ) -> Result> { - tracing::info!(?replica_id, "handling download instances message"); + tracing::debug!(?replica_id, "handling download instances message"); let mut entries = Vec::new(); let subspace = keys::subspace(replica_id); diff --git a/engine/packages/epoxy/src/replica/messages/pre_accept.rs b/engine/packages/epoxy/src/replica/messages/pre_accept.rs index ecbcde3327..69091386b1 100644 --- a/engine/packages/epoxy/src/replica/messages/pre_accept.rs +++ b/engine/packages/epoxy/src/replica/messages/pre_accept.rs @@ -11,7 +11,7 @@ pub async fn pre_accept( replica_id: protocol::ReplicaId, pre_accept_req: protocol::PreAcceptRequest, ) -> Result { - tracing::info!(?replica_id, "handling pre-accept message"); + tracing::debug!(?replica_id, "handling pre-accept message"); let protocol::Payload { proposal, diff --git a/engine/packages/epoxy/src/replica/messages/prepare.rs b/engine/packages/epoxy/src/replica/messages/prepare.rs index 19d50cb816..a3f76d7f73 100644 --- a/engine/packages/epoxy/src/replica/messages/prepare.rs +++ b/engine/packages/epoxy/src/replica/messages/prepare.rs @@ -11,7 +11,7 @@ pub async fn prepare( replica_id: protocol::ReplicaId, prepare_req: protocol::PrepareRequest, ) -> Result { - tracing::info!(?replica_id, "handling prepare message"); + tracing::debug!(?replica_id, "handling prepare message"); let protocol::PrepareRequest { ballot, instance } = prepare_req; diff --git a/engine/packages/epoxy/src/workflows/coordinator/reconfigure.rs b/engine/packages/epoxy/src/workflows/coordinator/reconfigure.rs index fbc5ccca51..e0ab58cae8 100644 --- a/engine/packages/epoxy/src/workflows/coordinator/reconfigure.rs +++ b/engine/packages/epoxy/src/workflows/coordinator/reconfigure.rs @@ -63,7 +63,7 @@ pub async fn check_config_changes( ctx: &ActivityCtx, _input: &CheckConfigChangesInput, ) -> Result> { - tracing::info!("checking for config changes"); + tracing::debug!("checking for config changes"); let state = ctx.state::()?; @@ -107,7 +107,7 @@ pub async fn check_config_changes( .collect(); if new_replicas.is_empty() { - tracing::info!("no new replicas found"); + tracing::debug!("no new replicas found"); return Ok(None); } @@ -144,7 +144,7 @@ pub async fn health_check_new_replicas( return Ok(false); } - tracing::info!( + tracing::debug!( new_replicas = ?input.new_replicas, "health checking new replicas" ); @@ -154,7 +154,7 @@ pub async fn health_check_new_replicas( let replica_id = replica.replica_id; async move { - tracing::info!(?replica_id, "sending health check to replica"); + tracing::debug!(?replica_id, "sending health check to replica"); let from_replica_id = ctx.config().epoxy_replica_id(); let request = protocol::Request { @@ -171,7 +171,7 @@ pub async fn health_check_new_replicas( .await .with_context(|| format!("health check failed for replica {}", replica_id))?; - tracing::info!(?replica_id, "health check successful"); + tracing::debug!(?replica_id, "health check successful"); Ok(()) } }); @@ -199,7 +199,7 @@ pub async fn add_replicas_as_joining( state.config.replicas.push(replica.clone().into()); } - tracing::info!("added {} replicas as joining", input.new_replicas.len()); + tracing::debug!("added {} replicas as joining", input.new_replicas.len()); // IMPORTANT: Do not increment epoch at this stage, despite what the EPaxos paper recommends. // See epoxy/README.md for more details. @@ -231,7 +231,7 @@ pub async fn send_begin_learning( let config = config.clone(); async move { - tracing::info!(?replica_id, "sending begin learning to replica"); + tracing::debug!(?replica_id, "sending begin learning to replica"); let request = protocol::Request { from_replica_id: ctx.config().epoxy_replica_id(), @@ -244,7 +244,7 @@ pub async fn send_begin_learning( crate::http_client::send_message(&ApiCtx::new_from_activity(&ctx)?, &config, request) .await?; - tracing::info!(?replica_id, "begin learning sent successfully"); + tracing::debug!(?replica_id, "begin learning sent successfully"); Ok(()) } }); @@ -266,21 +266,21 @@ fn should_abort_reconfigure( .iter() .find(|x| x.datacenter_label as u64 == replica.replica_id) else { - tracing::info!( + tracing::debug!( "config changed during reconfigure (replica removed), aborting reconfigure" ); return Ok(true); }; if url::Url::parse(&replica.api_peer_url)? != current_dc.peer_url { - tracing::info!( + tracing::debug!( "config changed during reconfigure (api_peer_url changed), aborting reconfigure" ); return Ok(true); } if url::Url::parse(&replica.guard_url)? != current_dc.public_url { - tracing::info!( + tracing::debug!( "config changed during reconfigure (guard_url changed), aborting reconfigure" ); return Ok(true); diff --git a/engine/packages/epoxy/src/workflows/coordinator/replica_status_change.rs b/engine/packages/epoxy/src/workflows/coordinator/replica_status_change.rs index 7445616136..44a3abbe61 100644 --- a/engine/packages/epoxy/src/workflows/coordinator/replica_status_change.rs +++ b/engine/packages/epoxy/src/workflows/coordinator/replica_status_change.rs @@ -85,9 +85,9 @@ pub async fn update_replica_status( // Update status replica_state.status = input.new_status.clone().into(); - tracing::info!( - replica_id = ?input.replica_id, - new_status = ?input.new_status, + tracing::debug!( + replica_id=?input.replica_id, + new_status=?input.new_status, "updated replica status" ); @@ -103,7 +103,7 @@ pub async fn increment_epoch(ctx: &ActivityCtx, _input: &IncrementEpochInput) -> state.config.epoch += 1; - tracing::info!(new_epoch = state.config.epoch, "incremented epoch"); + tracing::debug!(new_epoch = state.config.epoch, "incremented epoch"); Ok(()) } @@ -119,7 +119,7 @@ pub async fn update_replica_urls(ctx: &ActivityCtx, _input: &UpdateReplicaUrlsIn for replica in state.config.replicas.iter_mut() { let Some(dc) = ctx.config().dc_for_label(replica.replica_id as u16) else { tracing::warn!( - replica_id = ?replica.replica_id, + replica_id=?replica.replica_id, "datacenter not found for replica, skipping url update" ); continue; @@ -128,10 +128,10 @@ pub async fn update_replica_urls(ctx: &ActivityCtx, _input: &UpdateReplicaUrlsIn replica.api_peer_url = dc.peer_url.to_string(); replica.guard_url = dc.public_url.to_string(); - tracing::info!( - replica_id = ?replica.replica_id, - api_peer_url = ?dc.peer_url, - guard_url = ?dc.public_url, + tracing::debug!( + replica_id=?replica.replica_id, + api_peer_url=?dc.peer_url, + guard_url=?dc.public_url, "updated replica urls" ); } @@ -156,7 +156,7 @@ pub async fn notify_all_replicas( let config: protocol::ClusterConfig = state.config.clone().into(); - tracing::info!( + tracing::debug!( epoch = config.epoch, replica_count = config.replicas.len(), "notifying all replicas of config change" @@ -180,7 +180,7 @@ pub async fn notify_all_replicas( .await .with_context(|| format!("failed to update config for replica {}", replica_id))?; - tracing::info!(?replica_id, "config update sent"); + tracing::debug!(?replica_id, "config update sent"); Ok(()) } }); diff --git a/engine/packages/epoxy/src/workflows/replica/setup.rs b/engine/packages/epoxy/src/workflows/replica/setup.rs index 5f7783032b..c85993e6d5 100644 --- a/engine/packages/epoxy/src/workflows/replica/setup.rs +++ b/engine/packages/epoxy/src/workflows/replica/setup.rs @@ -117,7 +117,7 @@ pub async fn setup_replica(ctx: &mut WorkflowCtx, _input: &super::Input) -> Resu state.total_recovered_keys += output.recovered_count; } else { // No more keys to recover - tracing::info!( + tracing::debug!( total_recovered_keys = state.total_recovered_keys, "finished recovering keys" ); @@ -172,7 +172,7 @@ pub async fn download_instances_chunk( let config = &input.learning_config; let proto_config: protocol::ClusterConfig = input.learning_config.clone().into(); - tracing::info!( + tracing::debug!( from_replica_id = ?input.from_replica_id, replica_progress = format!("{}/{}", input.replica_index, input.total_replicas - 1), // -1 to exclude self total_downloaded_instances = input.total_downloaded_instances, @@ -201,7 +201,7 @@ pub async fn download_instances_chunk( }; let instances = download_response.instances; - tracing::info!(instance_count = instances.len(), "received instances"); + tracing::debug!(instance_count = instances.len(), "received instances"); // Apply each log entry from the downloaded instances let total_entries = instances.len(); @@ -230,7 +230,7 @@ pub async fn download_instances_chunk( applied_count += 1; } - tracing::info!( + tracing::debug!( total_entries, applied_count, skipped_count, @@ -269,7 +269,7 @@ async fn apply_log_entry( ) -> Result<()> { let replica_id = ctx.config().epoxy_replica_id(); - tracing::info!( + tracing::debug!( ?instance, ?log_entry.state, "replaying log entry" @@ -334,7 +334,7 @@ async fn apply_log_entry( .custom_instrument(tracing::info_span!("apply_log_entry_tx")) .await?; - tracing::info!( + tracing::debug!( ?instance, ?log_entry.state, "successfully replayed log entry" @@ -392,7 +392,7 @@ pub async fn recover_keys_chunk( ) -> Result { let replica_id = ctx.config().epoxy_replica_id(); - tracing::info!( + tracing::debug!( ?replica_id, total_recovered_keys = input.total_recovered_keys, after_key_len = input.after_key.as_ref().map(|k| k.len()), @@ -534,7 +534,7 @@ pub async fn recover_keys_chunk( None }; - tracing::info!( + tracing::debug!( ?replica_id, recovered_count, scanned_count, @@ -801,7 +801,7 @@ pub async fn notify_active(ctx: &ActivityCtx, input: &NotifyActiveInput) -> Resu let config = &input.learning_config; let proto_config: protocol::ClusterConfig = config.clone().into(); - tracing::info!("notifying coordinator that replica is active"); + tracing::debug!("notifying coordinator that replica is active"); // Send status update to coordinator let request = protocol::Request { @@ -818,6 +818,6 @@ pub async fn notify_active(ctx: &ActivityCtx, input: &NotifyActiveInput) -> Resu crate::http_client::send_message(&ApiCtx::new_from_activity(&ctx)?, &proto_config, request) .await?; - tracing::info!("notified coordinator of active status"); + tracing::debug!("notified coordinator of active status"); Ok(()) } diff --git a/engine/packages/gasoline/src/ctx/message.rs b/engine/packages/gasoline/src/ctx/message.rs index 6af2b3e50e..b4e1d73cdc 100644 --- a/engine/packages/gasoline/src/ctx/message.rs +++ b/engine/packages/gasoline/src/ctx/message.rs @@ -162,7 +162,7 @@ impl MessageCtx { ) .await { - tracing::warn!(?err, "publish message failed, trying again"); + tracing::debug!(?err, "publish message failed, trying again"); continue; } diff --git a/engine/packages/gasoline/src/ctx/workflow.rs b/engine/packages/gasoline/src/ctx/workflow.rs index 25d79ea7d0..8987624f31 100644 --- a/engine/packages/gasoline/src/ctx/workflow.rs +++ b/engine/packages/gasoline/src/ctx/workflow.rs @@ -1051,7 +1051,7 @@ impl WorkflowCtx { // No-op if duration <= 0 { - if !replay && duration < -50 { + if !replay && duration < -25 { tracing::warn!(%duration, "tried to sleep for a negative duration"); } } diff --git a/engine/packages/gasoline/src/worker.rs b/engine/packages/gasoline/src/worker.rs index 471276146d..43b1680040 100644 --- a/engine/packages/gasoline/src/worker.rs +++ b/engine/packages/gasoline/src/worker.rs @@ -152,7 +152,10 @@ impl Worker { for (workflow_id, wf) in &self.running_workflows { if wf.stop.send(()).is_err() { - tracing::warn!(?workflow_id, "stop channel closed"); + tracing::warn!( + ?workflow_id, + "stop channel closed, workflow likely already stopped" + ); } } diff --git a/engine/packages/guard/src/lib.rs b/engine/packages/guard/src/lib.rs index f7533372e3..a09e0fd912 100644 --- a/engine/packages/guard/src/lib.rs +++ b/engine/packages/guard/src/lib.rs @@ -24,7 +24,7 @@ pub async fn start(config: rivet_config::Config, pools: rivet_pools::Pools) -> R // Initialize with a default CryptoProvider for rustls let provider = rustls::crypto::ring::default_provider(); if provider.install_default().is_err() { - tracing::warn!("crypto provider already installed in this process"); + tracing::debug!("crypto provider already installed in this process"); } // Share shared context diff --git a/engine/packages/guard/src/routing/runner.rs b/engine/packages/guard/src/routing/runner.rs index f090d3b49c..e616d86731 100644 --- a/engine/packages/guard/src/routing/runner.rs +++ b/engine/packages/guard/src/routing/runner.rs @@ -54,7 +54,7 @@ async fn route_runner_internal( // Validate that the host is valid for the current datacenter let current_dc = ctx.config().topology().current_dc()?; if !current_dc.is_valid_regional_host(host) { - tracing::warn!(?host, datacenter = ?current_dc.name, "invalid host for current datacenter"); + tracing::warn!(?host, datacenter=?current_dc.name, "invalid host for current datacenter"); // Determine valid hosts for error message let valid_hosts = if let Some(hosts) = ¤t_dc.valid_hosts { diff --git a/engine/packages/metrics/src/providers.rs b/engine/packages/metrics/src/providers.rs index aac9a1b092..72771180ee 100644 --- a/engine/packages/metrics/src/providers.rs +++ b/engine/packages/metrics/src/providers.rs @@ -66,7 +66,7 @@ pub fn set_sampler_ratio(ratio: f64) -> anyhow::Result<()> { .ok_or_else(|| anyhow::anyhow!("sampler not initialized"))?; sampler.set_ratio(ratio); - tracing::info!(?ratio, "updated sampler ratio"); + tracing::debug!(?ratio, "updated sampler ratio"); Ok(()) } diff --git a/engine/packages/pegboard/src/workflows/runner.rs b/engine/packages/pegboard/src/workflows/runner.rs index 33c4840d0c..43c1c876e6 100644 --- a/engine/packages/pegboard/src/workflows/runner.rs +++ b/engine/packages/pegboard/src/workflows/runner.rs @@ -253,7 +253,10 @@ pub async fn pegboard_runner(ctx: &mut WorkflowCtx, input: &Input) -> Result<()> true, ) = (&command.inner, state.draining) { - tracing::warn!(?actor_id, "attempt to schedule actor to draining runner"); + tracing::warn!( + ?actor_id, + "attempt to schedule actor to draining runner, reallocating" + ); let res = ctx .signal(crate::workflows::actor::Lost { diff --git a/engine/packages/runtime/src/traces.rs b/engine/packages/runtime/src/traces.rs index 182baea806..ebe015d4cb 100644 --- a/engine/packages/runtime/src/traces.rs +++ b/engine/packages/runtime/src/traces.rs @@ -152,7 +152,7 @@ pub fn reload_log_filter(filter_spec: &str) -> anyhow::Result<()> { // Reload the filter handle.reload(env_filter)?; - tracing::info!(?filter_spec, "reloaded log filter"); + tracing::debug!(?filter_spec, "reloaded log filter"); Ok(()) } diff --git a/engine/packages/service-manager/src/lib.rs b/engine/packages/service-manager/src/lib.rs index f48fee8900..0aa53e7e30 100644 --- a/engine/packages/service-manager/src/lib.rs +++ b/engine/packages/service-manager/src/lib.rs @@ -122,12 +122,12 @@ pub async fn start( services: Vec, ) -> Result<()> { // Spawn services - tracing::info!(services = ?services.len(), "starting services"); + tracing::info!(services=?services.len(), "starting services"); let mut join_set = tokio::task::JoinSet::new(); let cron_schedule = tokio_cron_scheduler::JobScheduler::new().await?; let mut sleep_indefinitely = false; for service in services { - tracing::debug!(name = %service.name, kind = ?service.kind, "server starting service"); + tracing::debug!(name=%service.name, kind=?service.kind, "server starting service"); match service.kind.behavior() { ServiceBehavior::Service => { @@ -138,21 +138,21 @@ pub async fn start( let config = config.clone(); let pools = pools.clone(); async move { - tracing::debug!(service = %service.name, "starting service"); + tracing::debug!(service=%service.name, "starting service"); loop { match (service.run)(config.clone(), pools.clone()).await { Result::Ok(_) => { - tracing::error!(service = %service.name, "service exited unexpectedly"); + tracing::error!(service=%service.name, "service exited unexpectedly"); } Err(err) => { - tracing::error!(service = %service.name, ?err, "service crashed"); + tracing::error!(service=%service.name, ?err, "service crashed"); } } tokio::time::sleep(Duration::from_secs(1)).await; - tracing::info!(service = %service.name, "restarting service"); + tracing::info!(service=%service.name, "restarting service"); } } }) @@ -166,20 +166,20 @@ pub async fn start( let config = config.clone(); let pools = pools.clone(); async move { - tracing::debug!(oneoff = %service.name, "starting oneoff"); + tracing::debug!(oneoff=%service.name, "starting oneoff"); loop { match (service.run)(config.clone(), pools.clone()).await { Result::Ok(_) => { - tracing::debug!(oneoff = %service.name, "oneoff finished"); + tracing::debug!(oneoff=%service.name, "oneoff finished"); break; } Err(err) => { - tracing::error!(oneoff = %service.name, ?err, "oneoff crashed"); + tracing::error!(oneoff=%service.name, ?err, "oneoff crashed"); tokio::time::sleep(Duration::from_secs(1)).await; - tracing::info!(oneoff = %service.name, "restarting oneoff"); + tracing::info!(oneoff=%service.name, "restarting oneoff"); } } } @@ -200,20 +200,20 @@ pub async fn start( let config = config.clone(); let pools = pools.clone(); async move { - tracing::debug!(cron = %service.name, "starting immediate cron"); + tracing::debug!(cron=%service.name, "starting immediate cron"); for attempt in 1..=8 { match (service.run)(config.clone(), pools.clone()).await { Result::Ok(_) => { - tracing::debug!(cron = %service.name, ?attempt, "cron finished"); + tracing::debug!(cron=%service.name, ?attempt, "cron finished"); break; } Err(err) => { - tracing::error!(cron = %service.name, ?attempt, ?err, "cron crashed"); + tracing::error!(cron=%service.name, ?attempt, ?err, "cron crashed"); tokio::time::sleep(Duration::from_secs(1)).await; - tracing::info!(cron = %service.name, ?attempt, "restarting cron"); + tracing::info!(cron=%service.name, ?attempt, "restarting cron"); } } } @@ -235,20 +235,20 @@ pub async fn start( let pools = pools.clone(); let service = service.clone(); Box::pin(async move { - tracing::debug!(cron = %service.name, ?notification, "running cron"); + tracing::debug!(cron=%service.name, ?notification, "running cron"); for attempt in 1..=8 { match (service.run)(config.clone(), pools.clone()).await { Result::Ok(_) => { - tracing::debug!(cron = %service.name, ?attempt, "cron finished"); + tracing::debug!(cron=%service.name, ?attempt, "cron finished"); return; } Err(err) => { - tracing::error!(cron = %service.name, ?attempt, ?err, "cron crashed"); + tracing::error!(cron=%service.name, ?attempt, ?err, "cron crashed"); tokio::time::sleep(Duration::from_secs(1)).await; - tracing::info!(cron = %service.name, ?attempt, "restarting cron"); + tracing::info!(cron=%service.name, ?attempt, "restarting cron"); } } } diff --git a/engine/packages/tracing-reconfigure/src/lib.rs b/engine/packages/tracing-reconfigure/src/lib.rs index 9151db195b..0d4110fd9a 100644 --- a/engine/packages/tracing-reconfigure/src/lib.rs +++ b/engine/packages/tracing-reconfigure/src/lib.rs @@ -13,20 +13,18 @@ pub struct SetTracingConfigMessage { #[tracing::instrument(skip_all)] pub async fn start(_config: rivet_config::Config, pools: rivet_pools::Pools) -> Result<()> { - tracing::info!("starting tracing reconfigure subscriber service"); - // Subscribe to tracing config updates let ups = pools.ups()?; let subject = "rivet.debug.tracing.config"; let mut sub = ups.subscribe(subject).await?; - tracing::info!(subject = ?subject, "subscribed to tracing config updates"); + tracing::debug!(subject = ?subject, "subscribed to tracing config updates"); // Process incoming messages while let Ok(NextOutput::Message(msg)) = sub.next().await { match serde_json::from_slice::(&msg.payload) { Ok(update_msg) => { - tracing::info!( + tracing::debug!( filter = ?update_msg.filter, sampler_ratio = ?update_msg.sampler_ratio, "received tracing config update" @@ -76,7 +74,5 @@ pub async fn start(_config: rivet_config::Config, pools: rivet_pools::Pools) -> } } - tracing::warn!("tracing reconfigure subscriber service stopped"); - Ok(()) } diff --git a/engine/packages/universalpubsub/src/driver/postgres/mod.rs b/engine/packages/universalpubsub/src/driver/postgres/mod.rs index ca801e81b2..f60ba6f6f8 100644 --- a/engine/packages/universalpubsub/src/driver/postgres/mod.rs +++ b/engine/packages/universalpubsub/src/driver/postgres/mod.rs @@ -116,7 +116,7 @@ impl PostgresDriver { loop { match tokio_postgres::connect(&conn_str, tokio_postgres::NoTls).await { Result::Ok((new_client, conn)) => { - tracing::info!("postgres listen connection established"); + tracing::debug!("postgres listen connection established"); // Reset backoff on successful connection backoff = Backoff::default(); @@ -148,7 +148,7 @@ impl PostgresDriver { "re-subscribing to channels after reconnection" ); for channel in &channels { - tracing::info!(?channel, "re-subscribing to channel"); + tracing::debug!(?channel, "re-subscribing to channel"); if let Result::Err(e) = new_client .execute(&format!("LISTEN \"{}\"", channel), &[]) .await diff --git a/engine/packages/universalpubsub/src/pubsub.rs b/engine/packages/universalpubsub/src/pubsub.rs index ac6ff27696..a0a22d9fae 100644 --- a/engine/packages/universalpubsub/src/pubsub.rs +++ b/engine/packages/universalpubsub/src/pubsub.rs @@ -190,11 +190,11 @@ impl PubSub { match self.driver.publish(subject, encoded).await { Result::Ok(_) => break, Err(err) if !backoff.tick().await => { - tracing::info!(?err, "error publishing, cannot retry again"); + tracing::warn!(?err, "error publishing, cannot retry again"); return Err(crate::errors::Ups::PublishFailed.build().into()); } Err(err) => { - tracing::info!(?err, "error publishing, retrying"); + tracing::debug!(?err, "error publishing, retrying"); // Continue retrying } } From 3b4f5267d8776e39c4ba2821546f3da15ec11dc4 Mon Sep 17 00:00:00 2001 From: MasterPtato <23087326+MasterPtato@users.noreply.github.com> Date: Wed, 5 Nov 2025 02:21:24 +0000 Subject: [PATCH 3/3] fix: fix all dashboards (#3358) --- .../dev-host/grafana/dashboards/api.json | 212 +++--- .../dev-host/grafana/dashboards/cache.json | 643 ++++++++---------- .../dev-host/grafana/dashboards/futures.json | 78 ++- .../dev-host/grafana/dashboards/gasoline.json | 406 +++++------ .../dev-host/grafana/dashboards/guard.json | 358 +++++++--- engine/docker/dev-host/grafana/grafana.ini | 2 +- .../otel-collector-server/config.yaml | 11 + .../core/grafana/dashboards/api.json | 212 +++--- .../core/grafana/dashboards/cache.json | 643 ++++++++---------- .../core/grafana/dashboards/futures.json | 78 ++- .../core/grafana/dashboards/gasoline.json | 406 +++++------ .../core/grafana/dashboards/guard.json | 358 +++++++--- .../core/grafana/grafana.ini | 2 +- .../dc-a/otel-collector-server/config.yaml | 11 + .../dc-b/otel-collector-server/config.yaml | 11 + .../dc-c/otel-collector-server/config.yaml | 11 + .../dev-multidc-multinode/docker-compose.yml | 6 + .../core/grafana/dashboards/api.json | 212 +++--- .../core/grafana/dashboards/cache.json | 643 ++++++++---------- .../core/grafana/dashboards/futures.json | 78 ++- .../core/grafana/dashboards/gasoline.json | 406 +++++------ .../core/grafana/dashboards/guard.json | 358 +++++++--- .../dev-multidc/core/grafana/grafana.ini | 2 +- .../dc-a/otel-collector-server/config.yaml | 11 + .../dc-b/otel-collector-server/config.yaml | 11 + .../dc-c/otel-collector-server/config.yaml | 11 + engine/docker/dev-multidc/docker-compose.yml | 6 + .../docker/dev-multinode/docker-compose.yml | 2 + .../dev-multinode/grafana/dashboards/api.json | 212 +++--- .../grafana/dashboards/cache.json | 643 ++++++++---------- .../grafana/dashboards/futures.json | 78 ++- .../grafana/dashboards/gasoline.json | 406 +++++------ .../grafana/dashboards/guard.json | 358 +++++++--- .../docker/dev-multinode/grafana/grafana.ini | 2 +- .../otel-collector-server/config.yaml | 11 + engine/docker/dev/grafana/dashboards/api.json | 212 +++--- .../docker/dev/grafana/dashboards/cache.json | 643 ++++++++---------- .../dev/grafana/dashboards/futures.json | 78 ++- .../dev/grafana/dashboards/gasoline.json | 406 +++++------ .../docker/dev/grafana/dashboards/guard.json | 358 +++++++--- engine/docker/dev/grafana/grafana.ini | 2 +- .../dev/otel-collector-server/config.yaml | 11 + .../template/grafana-dashboards/api.json | 212 +++--- .../template/grafana-dashboards/cache.json | 643 ++++++++---------- .../template/grafana-dashboards/futures.json | 78 ++- .../template/grafana-dashboards/gasoline.json | 406 +++++------ .../template/grafana-dashboards/guard.json | 358 +++++++--- engine/docker/template/src/docker-compose.ts | 1 + .../template/src/services/core/grafana.ts | 2 +- .../services/edge/otel-collector-server.ts | 24 +- engine/packages/api-builder/src/middleware.rs | 6 +- engine/packages/gasoline/src/ctx/workflow.rs | 2 +- engine/packages/gasoline/src/metrics.rs | 4 +- engine/packages/metrics/src/buckets.rs | 14 +- engine/packages/metrics/src/providers.rs | 2 +- engine/packages/pegboard/src/metrics.rs | 54 +- .../packages/pegboard/src/workflows/runner.rs | 26 +- 57 files changed, 5140 insertions(+), 5300 deletions(-) diff --git a/engine/docker/dev-host/grafana/dashboards/api.json b/engine/docker/dev-host/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-host/grafana/dashboards/api.json +++ b/engine/docker/dev-host/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-host/grafana/dashboards/cache.json b/engine/docker/dev-host/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-host/grafana/dashboards/cache.json +++ b/engine/docker/dev-host/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-host/grafana/dashboards/futures.json b/engine/docker/dev-host/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-host/grafana/dashboards/futures.json +++ b/engine/docker/dev-host/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-host/grafana/dashboards/gasoline.json b/engine/docker/dev-host/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-host/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-host/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-host/grafana/dashboards/guard.json b/engine/docker/dev-host/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-host/grafana/dashboards/guard.json +++ b/engine/docker/dev-host/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-host/grafana/grafana.ini b/engine/docker/dev-host/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-host/grafana/grafana.ini +++ b/engine/docker/dev-host/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-host/otel-collector-server/config.yaml b/engine/docker/dev-host/otel-collector-server/config.yaml index 15f12073f0..c4414738d1 100644 --- a/engine/docker/dev-host/otel-collector-server/config.yaml +++ b/engine/docker/dev-host/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: default + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini b/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini +++ b/engine/docker/dev-multidc-multinode/core/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml b/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml index a74179019e..64004c2dc7 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-a + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml b/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml index a74179019e..7dae8a71d0 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-b + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml b/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml index a74179019e..a4fd830662 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-c + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc-multinode/docker-compose.yml b/engine/docker/dev-multidc-multinode/docker-compose.yml index f5201df8f2..7fbbc3f111 100644 --- a/engine/docker/dev-multidc-multinode/docker-compose.yml +++ b/engine/docker/dev-multidc-multinode/docker-compose.yml @@ -137,6 +137,8 @@ services: networks: - rivet-network-dc-a - rivet-network-dc-a-to-core + ports: + - '4317:4317' otel-collector-client-dc-a: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -404,6 +406,8 @@ services: networks: - rivet-network-dc-b - rivet-network-dc-b-to-core + ports: + - '4317:4317' otel-collector-client-dc-b: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -667,6 +671,8 @@ services: networks: - rivet-network-dc-c - rivet-network-dc-c-to-core + ports: + - '4317:4317' otel-collector-client-dc-c: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/api.json b/engine/docker/dev-multidc/core/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/api.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/cache.json b/engine/docker/dev-multidc/core/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/cache.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/futures.json b/engine/docker/dev-multidc/core/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/futures.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json b/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-multidc/core/grafana/grafana.ini b/engine/docker/dev-multidc/core/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-multidc/core/grafana/grafana.ini +++ b/engine/docker/dev-multidc/core/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml b/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml index a74179019e..64004c2dc7 100644 --- a/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc/datacenters/dc-a/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-a + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml b/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml index a74179019e..7dae8a71d0 100644 --- a/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc/datacenters/dc-b/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-b + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml b/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml index a74179019e..a4fd830662 100644 --- a/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml +++ b/engine/docker/dev-multidc/datacenters/dc-c/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: dc-c + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev-multidc/docker-compose.yml b/engine/docker/dev-multidc/docker-compose.yml index 96c1878833..e71dfa07f2 100644 --- a/engine/docker/dev-multidc/docker-compose.yml +++ b/engine/docker/dev-multidc/docker-compose.yml @@ -136,6 +136,8 @@ services: networks: - rivet-network-dc-a - rivet-network-dc-a-to-core + ports: + - '4317:4317' otel-collector-client-dc-a: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -289,6 +291,8 @@ services: networks: - rivet-network-dc-b - rivet-network-dc-b-to-core + ports: + - '4317:4317' otel-collector-client-dc-b: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped @@ -438,6 +442,8 @@ services: networks: - rivet-network-dc-c - rivet-network-dc-c-to-core + ports: + - '4317:4317' otel-collector-client-dc-c: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped diff --git a/engine/docker/dev-multinode/docker-compose.yml b/engine/docker/dev-multinode/docker-compose.yml index fb05f523db..7deffc9227 100644 --- a/engine/docker/dev-multinode/docker-compose.yml +++ b/engine/docker/dev-multinode/docker-compose.yml @@ -130,6 +130,8 @@ services: networks: - rivet-network - rivet-network-to-core + ports: + - '4317:4317' otel-collector-client: image: otel/opentelemetry-collector-contrib:latest restart: unless-stopped diff --git a/engine/docker/dev-multinode/grafana/dashboards/api.json b/engine/docker/dev-multinode/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/api.json +++ b/engine/docker/dev-multinode/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multinode/grafana/dashboards/cache.json b/engine/docker/dev-multinode/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/cache.json +++ b/engine/docker/dev-multinode/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multinode/grafana/dashboards/futures.json b/engine/docker/dev-multinode/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/futures.json +++ b/engine/docker/dev-multinode/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev-multinode/grafana/dashboards/gasoline.json b/engine/docker/dev-multinode/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multinode/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev-multinode/grafana/dashboards/guard.json b/engine/docker/dev-multinode/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/guard.json +++ b/engine/docker/dev-multinode/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev-multinode/grafana/grafana.ini b/engine/docker/dev-multinode/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev-multinode/grafana/grafana.ini +++ b/engine/docker/dev-multinode/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev-multinode/otel-collector-server/config.yaml b/engine/docker/dev-multinode/otel-collector-server/config.yaml index a74179019e..7b5fc80c72 100644 --- a/engine/docker/dev-multinode/otel-collector-server/config.yaml +++ b/engine/docker/dev-multinode/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: default + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/dev/grafana/dashboards/api.json b/engine/docker/dev/grafana/dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/dev/grafana/dashboards/api.json +++ b/engine/docker/dev/grafana/dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev/grafana/dashboards/cache.json b/engine/docker/dev/grafana/dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/dev/grafana/dashboards/cache.json +++ b/engine/docker/dev/grafana/dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/dev/grafana/dashboards/futures.json b/engine/docker/dev/grafana/dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/dev/grafana/dashboards/futures.json +++ b/engine/docker/dev/grafana/dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/dev/grafana/dashboards/gasoline.json b/engine/docker/dev/grafana/dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/dev/grafana/dashboards/gasoline.json +++ b/engine/docker/dev/grafana/dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev/grafana/dashboards/guard.json b/engine/docker/dev/grafana/dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/dev/grafana/dashboards/guard.json +++ b/engine/docker/dev/grafana/dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/dev/grafana/grafana.ini b/engine/docker/dev/grafana/grafana.ini index 1bd9bfe697..98c1df9724 100644 --- a/engine/docker/dev/grafana/grafana.ini +++ b/engine/docker/dev/grafana/grafana.ini @@ -8,7 +8,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/dev/otel-collector-server/config.yaml b/engine/docker/dev/otel-collector-server/config.yaml index a74179019e..7b5fc80c72 100644 --- a/engine/docker/dev/otel-collector-server/config.yaml +++ b/engine/docker/dev/otel-collector-server/config.yaml @@ -4,6 +4,14 @@ receivers: grpc: endpoint: 0.0.0.0:4317 processors: + resource: + attributes: + - key: rivet.project + value: dev + action: upsert + - key: rivet.datacenter + value: default + action: upsert batch: timeout: 5s send_batch_size: 10000 @@ -42,6 +50,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -49,6 +58,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse @@ -56,6 +66,7 @@ service: receivers: - otlp processors: + - resource - batch exporters: - clickhouse diff --git a/engine/docker/template/grafana-dashboards/api.json b/engine/docker/template/grafana-dashboards/api.json index 4ad455621b..a2aef94005 100644 --- a/engine/docker/template/grafana-dashboards/api.json +++ b/engine/docker/template/grafana-dashboards/api.json @@ -120,10 +120,10 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n concat(bounds[idx-1], 's - ', bounds[idx], 's') as label,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, label\nORDER BY Time", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -132,8 +132,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "label", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -144,7 +144,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\label" + "targetField": "Time\\bucket" } ], "fields": {} @@ -169,6 +169,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -244,28 +245,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_pending'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_pending'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Requests Pending", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -364,10 +366,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING sum(Count) > 0\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(count_diff > 0 AND sum_diff >= 0, sum_diff / count_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n sum_val,\n count_val,\n sum_val - lagInFrame(sum_val, 1, sum_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as sum_diff,\n count_val - lagInFrame(count_val, 1, count_val) OVER (PARTITION BY method, path, datacenter ORDER BY time) as count_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Sum) as sum_val,\n max(Count) as count_val\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> ''\n)\nORDER BY label", "refId": "A" } ], @@ -491,10 +493,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.95)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.95)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -618,10 +620,10 @@ "format": 1, "legendFormat": "{{datacenter_id}} {{method}} {{path}}", "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Sum) / sum(Count) as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nHAVING value >= (\n SELECT quantile(0.99)(avg_value)\n FROM (\n SELECT sum(Sum) / sum(Count) as avg_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\n GROUP BY \n $__timeInterval(TimeUnix),\n ResourceAttributes['datacenter_id'],\n Attributes['method'],\n Attributes['path']\n )\n)\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n quantileInterpolatedWeighted(0.99)(bound_value, count_value) as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(ResourceAttributes['rivet.datacenter'], ' ', Attributes['method'], ' ', Attributes['path']) as label,\n arrayJoin(arrayEnumerate(arrayConcat([0], ExplicitBounds, [inf]))) as idx,\n arrayConcat([0], ExplicitBounds, [inf])[idx] as bound_value,\n BucketCounts[idx] as count_value\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n )\n GROUP BY time, label\n )\n\nORDER BY label\n", "refId": "A" } ], @@ -667,6 +669,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -744,36 +747,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path']\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_total'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path) as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_total'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -795,6 +790,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -872,36 +868,28 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, - "pluginVersion": "4.11.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['status'], ' (',\n Attributes['error_code'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', status, ' (', error_code, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '4%'\n AND Attributes['error_code'] NOT IN ('API_CANCELLED', 'CAPTCHA_CAPTCHA_REQUIRED')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (4xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -923,6 +911,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1000,29 +989,29 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "legendFormat": "{{datacenter_id}} {{method}} {{path}}: {{status}} ({{error_code}})", + "instant": false, "meta": {}, "pluginVersion": "4.11.1", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n concat(\n ResourceAttributes['datacenter_id'], ' ',\n Attributes['method'], ' ',\n Attributes['path'], ': ',\n Attributes['error_code'], ' (',\n Attributes['status'], ')'\n ) as label,\n sum(Value) / $metric_interval as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n concat(datacenter, ' ', method, ' ', path, ': ', error_code, ' (', status, ')') as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n method,\n path,\n status,\n error_code,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY method, path, status, error_code, datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['method'] as method,\n Attributes['path'] as path,\n Attributes['status'] as status,\n Attributes['error_code'] as error_code,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_api_request_errors'\n AND Attributes['status'] LIKE '5%'\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, method, path, status, error_code, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY time", "refId": "A" } ], "title": "Error Rate (5xx)", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", - "options": {} + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } } ], "type": "timeseries" @@ -1043,6 +1032,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1118,35 +1109,29 @@ }, "editorMode": "code", "editorType": "sql", - "format": 0, + "format": 1, "legendFormat": "{{method}} {{path}}: {{status}} {{error_code}}", "meta": {}, - "pluginVersion": "4.11.1", - "queryType": "timeseries", + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n -- Attributes['method'] as method,\n -- Attributes['path'] as path,\n Attributes['status'] as status,\n -- Attributes['error_code'] as error_code,\n sum(Count) / 30 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND $__conditionalAll(Attributes['path'], $path)\n AND $__conditionalAll(Attributes['method'], $method)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, status\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY label ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['status'], ' ', Attributes['error_code']) as label,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_api_request_duration'\n AND (Attributes['status'] = '200 OK' OR Attributes['status'] LIKE '5%')\n AND Attributes['path'] IN array($path)\n AND Attributes['method'] IN array($method)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, label\n )\n)\nORDER BY label\n", "refId": "A" } ], "title": "200 vs 5xx (4xx excluded)", "transformations": [ { - "id": "organize", + "id": "prepareTimeSeries", "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "time": 0, - "value 200 OK": 2, - "value 500 Internal Server Error": 1 - }, - "renameByName": { - "200 OK": "200", - "500 Internal Server Error": "500", - "time": "time", - "value 200 OK": "200", - "value 500 Internal Server Error": "500" - } + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1161,49 +1146,60 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Datacenter ID", + "label": "Project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_errors' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "Datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", + "definition": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "includeAll": true, "label": "Path", "multi": true, "name": "path", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) ORDER BY path", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['path'] as path FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY path", "refresh": 1, "regex": "", "sort": 1, @@ -1211,44 +1207,28 @@ }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", + "definition": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "includeAll": true, "label": "Method", "multi": true, "name": "method", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request' AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['method'] as method FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_api_request_total' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) AND $__conditionalAll(Attributes['path'], $path) ORDER BY method", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/template/grafana-dashboards/cache.json b/engine/docker/template/grafana-dashboards/cache.json index 222196172e..385e42ff48 100644 --- a/engine/docker/template/grafana-dashboards/cache.json +++ b/engine/docker/template/grafana-dashboards/cache.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, + "graphTooltip": 1, + "id": 8, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -66,7 +66,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -79,29 +78,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -116,36 +116,27 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n CASE\n WHEN ResourceAttributes['datacenter_id'] != '' AND ResourceAttributes['server_id'] != '' THEN concat(ResourceAttributes['datacenter_id'], ' - ', ResourceAttributes['server_id'])\n ELSE 'Route Cache Size'\n END as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Route Cache Size", + "title": "Cache Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "label", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\label" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -167,7 +158,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -196,7 +187,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -209,29 +199,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, + "w": 12, + "x": 12, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -246,13 +237,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 10 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_request_errors'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Rate Limiters", + "title": "Cache Request Error Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -271,7 +279,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -300,7 +308,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -313,29 +320,30 @@ "value": 80 } ] - } + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 8 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -350,13 +358,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_miss_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "In-Flight Counters", + "title": "Cache Miss Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -375,7 +400,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -404,6 +429,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -417,29 +443,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -454,13 +481,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND miss_diff >= 0, miss_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n miss_val - lagInFrame(miss_val, 1, miss_val) OVER (PARTITION BY key ORDER BY time) as miss_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_miss_total') as miss_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_miss_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active TCP Connections", + "title": "Cache Miss Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -479,7 +523,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -508,7 +552,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -522,112 +565,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 8 + "w": 12, + "x": 0, + "y": 16 }, "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "TCP Connection Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 1, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -641,36 +602,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_value_empty_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "TCP Connection Duration", + "title": "Cache Empty Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -688,7 +644,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,6 +673,7 @@ } }, "mappings": [], + "max": 1, "min": 0, "thresholds": { "mode": "absolute", @@ -730,29 +687,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 16 }, - "id": 7, + "id": 6, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -767,13 +725,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(total_diff > 0 AND empty_diff >= 0, empty_diff / total_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n empty_val - lagInFrame(empty_val, 1, empty_val) OVER (PARTITION BY key ORDER BY time) as empty_diff,\n total_val - lagInFrame(total_val, 1, total_val) OVER (PARTITION BY key ORDER BY time) as total_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n sumIf(Value, MetricName = 'rivet_cache_value_empty_total') as empty_val,\n sumIf(Value, MetricName = 'rivet_cache_value_total') as total_val\n FROM otel.otel_metrics_sum\n WHERE MetricName IN ('rivet_cache_value_empty_total', 'rivet_cache_value_total')\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> ''\n)\nORDER BY label", "refId": "A" } ], - "title": "Active Proxy Requests", + "title": "Cache Empty Rate (% of total)", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -792,7 +767,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -821,7 +796,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -835,112 +809,30 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 16 + "w": 12, + "x": 0, + "y": 24 }, - "id": 8, + "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": ["mean"], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.2", - "targets": [ - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "editorMode": "code", - "editorType": "sql", - "format": 1, - "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", - "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", - "refId": "A" - } - ], - "title": "Proxy Request Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 9, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -954,36 +846,31 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_request_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Proxy Request Duration", + "title": "Cache Purge Request Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" }, { "datasource": { @@ -992,59 +879,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 8, - "x": 0, + "w": 12, + "x": 12, "y": 24 }, - "id": 6, + "id": 8, "options": { - "calculate": false, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": ["mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.5.2", @@ -1058,40 +967,35 @@ "editorType": "sql", "format": 1, "instant": false, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n key as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n key,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY key ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY key ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['key'] as key,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_cache_purge_value_total'\n AND Attributes['key'] IN array($key)\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, key\n )\n )\n WHERE key <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], - "title": "Resolve Route Duration", + "title": "Cache Purge Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "le", - "emptyValue": "zero", - "rowField": "Time", - "valueField": "count" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "Time\\le" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], - "type": "heatmap" + "type": "timeseries" } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1099,16 +1003,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1116,16 +1023,19 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Dataceter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" @@ -1133,31 +1043,22 @@ { "current": { "text": "All", - "value": "$__all" + "value": ["$__all"] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "includeAll": true, - "label": "Server ID", + "label": "key", "multi": true, - "name": "server_id", + "name": "key", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT Attributes['key'] FROM otel.otel_metrics_sum WHERE MetricName = 'rivet_cache_request_total' AND ServiceName = 'rivet' AND ResourceAttributes['rivet.datacenter'] IN array($datacenter) ORDER BY Attributes['key']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1166,9 +1067,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", - "uid": "cen785ige8fswd2", + "timezone": "", + "title": "Cache", + "uid": "c35233ed-b698-4838-9426-18e1586017f1", "version": 1, "weekStart": "" } diff --git a/engine/docker/template/grafana-dashboards/futures.json b/engine/docker/template/grafana-dashboards/futures.json index 34d0c27571..03880e4bef 100644 --- a/engine/docker/template/grafana-dashboards/futures.json +++ b/engine/docker/template/grafana-dashboards/futures.json @@ -18,6 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 3, "links": [], "panels": [ { @@ -100,8 +101,11 @@ "editorMode": "code", "editorType": "sql", "format": 1, + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_instrumented_future_duration'\n AND $__conditionalAll(Attributes['name'], $name)\n AND $__conditionalAll(Attributes['location'], $location)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_instrumented_future_duration'\n -- AND ResourceAttributes['rivet.project'] IN array($project)\n -- AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['name'] IN array($name)\n AND Attributes['location'] IN array($location)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -110,7 +114,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -122,7 +126,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -138,6 +142,39 @@ "tags": [], "templating": { "list": [ + { + "current": { + "text": ["All"], + "value": ["$__all"] + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "description": "", + "includeAll": true, + "label": "project", + "multi": true, + "name": "project", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "includeAll": true, + "label": "datacenter", + "multi": true, + "name": "datacenter", + "options": [], + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", + "refresh": 1, + "regex": "", + "type": "query" + }, { "current": { "text": ["All"], @@ -147,17 +184,13 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", + "definition": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "includeAll": true, - "label": "Name", + "label": "name", "multi": true, "name": "name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['name'] as name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['name'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['name']", "refresh": 1, "regex": "", "type": "query" @@ -171,32 +204,16 @@ "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, - "definition": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", + "definition": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "includeAll": true, - "label": "Location", + "label": "location", "multi": true, "name": "location", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['location'] as location FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_instrumented_future_duration' ORDER BY location", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['location'] FROM otel.otel_metrics_histogram WHERE ServiceName = 'rivet' AND MetricName = 'rivet_instrumented_future_duration' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['location']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -207,6 +224,7 @@ "timepicker": {}, "timezone": "browser", "title": "Futures", - "version": 0, + "uid": "ef353ektqu4g0e", + "version": 1, "weekStart": "" } diff --git a/engine/docker/template/grafana-dashboards/gasoline.json b/engine/docker/template/grafana-dashboards/gasoline.json index 6a2fc3a3d6..2b0bffca01 100644 --- a/engine/docker/template/grafana-dashboards/gasoline.json +++ b/engine/docker/template/grafana-dashboards/gasoline.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 3, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -117,34 +118,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_active'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Running Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -247,34 +240,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.2", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_sleeping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Sleeping Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -330,7 +315,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -376,34 +362,26 @@ "instant": false, "legendFormat": "{{workflow_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflows", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -460,7 +438,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -506,34 +485,26 @@ "instant": false, "legendFormat": "({{workflow_name}}) {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n Attributes['error_code'] as error_code,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['workflow_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_workflow_dead'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Dead Workflow Errors", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\workflow_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -589,7 +560,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -635,34 +607,26 @@ "instant": false, "legendFormat": "__auto", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n count(*) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_worker_last_ping'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label, ResourceAttributes['rivet.datacenter']\n)\nORDER BY label", "refId": "A" } ], "title": "Active Workers", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "datacenter_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\datacenter_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -718,7 +682,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -764,34 +729,26 @@ "instant": false, "legendFormat": "{{signal_name}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as signal_name,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, signal_name\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['signal_name'] as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_signal_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Pending Signals", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "signal_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\signal_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -888,9 +845,9 @@ "format": 1, "hide": false, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_recv_lag'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -899,8 +856,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -911,7 +868,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1001,9 +958,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY\n Time, le\nORDER BY\n Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_pull_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1012,8 +969,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1024,7 +981,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1084,7 +1041,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1126,34 +1084,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1210,7 +1165,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,34 +1208,31 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "instant": false, "legendFormat": "{{worker_instance_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as worker_instance_id,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, worker_instance_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['worker_instance_id'] as label,\n max(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_last_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Last Pull Workflows History Duration", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "worker_instance_id", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\worker_instance_id" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1366,9 +1319,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1377,7 +1330,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1389,7 +1342,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1479,9 +1432,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_pull_workflows_history_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1490,7 +1443,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1502,7 +1455,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1605,9 +1558,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_activity_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1616,8 +1569,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1628,7 +1581,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1686,7 +1639,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1733,34 +1687,26 @@ "format": 1, "legendFormat": "{{activity_name}}: {{error_code}}", "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['activity_name'] as activity_name,\n Attributes['error_code'] as error_code,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, activity_name, error_code\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n\tSELECT\n $__timeInterval(TimeUnix) as time,\n concat(Attributes['activity_name'], ' (', Attributes['error'], ')') as label,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_gasoline_activity_errors'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Activity Error Rate", "transformations": [ { - "id": "groupingToMatrix", + "id": "prepareTimeSeries", "options": { - "columnField": "activity_name", - "emptyValue": "zero", - "rowField": "time", - "valueField": "value" + "format": "multi" } }, { - "id": "convertFieldType", + "id": "renameByRegex", "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "time\\activity_name" - } - ], - "fields": {} + "regex": "^value\\s(.*)$", + "renamePattern": "$1" } } ], @@ -1847,18 +1793,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Loop Upsert Duration", + "title": "Loop Iteration Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -1870,7 +1816,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1928,7 +1874,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1971,9 +1918,14 @@ "uid": "clickhouse" }, "editorMode": "code", + "editorType": "sql", + "format": 1, "legendFormat": "{{workflow_name}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(Count) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_histogram\nWHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND Attributes['workflow_name'] LIKE '${workflow_name:regex}'\n AND ResourceAttributes['cluster_id'] LIKE '${cluster_id:regex}'\n AND ResourceAttributes['datacenter_id'] LIKE '${datacenter_id:regex}'\n AND $__timeFilter(TimeUnix)\nGROUP BY time, workflow_name\nORDER BY time", + "rawSql": "WITH\n 30 as collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n workflow_name as label,\n greatest(0, total_count - lagInFrame(total_count, 1, 0) OVER (PARTITION BY workflow_name ORDER BY time)) / $__interval_ms * 1000 as value\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n Attributes['workflow_name'] as workflow_name,\n sum(arraySum(BucketCounts)) as total_count\n FROM otel.otel_metrics_histogram\n WHERE MetricName = 'rivet_gasoline_loop_iteration_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY time, workflow_name\n )\n)\nORDER BY label", "refId": "A" } ], @@ -1982,8 +1934,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "workflow_name", - "emptyValue": "zero", + "columnField": "label", + "emptyValue": "null", "rowField": "time", "valueField": "value" } @@ -1994,7 +1946,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "time\\workflow_name" + "targetField": "time\\label" } ], "fields": {} @@ -2084,9 +2036,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_message_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2095,7 +2047,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2107,7 +2059,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2197,9 +2149,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_signal_send_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2208,8 +2160,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -2220,7 +2172,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2310,9 +2262,9 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_find_workflows_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -2321,7 +2273,7 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2333,7 +2285,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2423,18 +2375,18 @@ "editorType": "sql", "format": 1, "meta": {}, - "pluginVersion": "4.10.1", + "pluginVersion": "4.11.2", "queryType": "table", - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND $__conditionalAll(Attributes['workflow_name'], $workflow_name)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_gasoline_workflow_dispatch_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND Attributes['workflow_name'] IN array($workflow_name)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], - "title": "Sub Workflow Dispatch Duration", + "title": "Workflow Dispatch Duration", "transformations": [ { "id": "groupingToMatrix", "options": { - "columnField": "le", + "columnField": "bucket", "emptyValue": "zero", "rowField": "Time", "valueField": "count" @@ -2446,7 +2398,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -2464,94 +2416,56 @@ "list": [ { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Cluster ID", + "label": "project", "multi": true, - "name": "cluster_id", + "name": "project", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY cluster_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Datacenter ID", + "label": "datacenter", "multi": true, - "name": "datacenter_id", + "name": "datacenter", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_gasoline_worker_last_ping' ORDER BY datacenter_id", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", - "sort": 1, "type": "query" }, { "current": { - "text": ["All"], + "text": "All", "value": ["$__all"] }, - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "clickhouse" - }, - "definition": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", + "definition": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "includeAll": true, - "label": "Workflow Name", + "label": "workflow name", "multi": true, "name": "workflow_name", "options": [], - "query": { - "qryType": 1, - "rawSql": "SELECT DISTINCT Attributes['workflow_name'] as workflow_name FROM otel.otel_metrics_histogram WHERE MetricName = 'rivet_gasoline_signal_recv_lag' ORDER BY workflow_name", - "refId": "ClickHouseVariableQueryEditor-VariableQuery" - }, + "query": "SELECT DISTINCT Attributes['workflow_name'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND MetricName = 'rivet_gasoline_workflow_total' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY Attributes['workflow_name']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, "time": { - "from": "now-5m", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/template/grafana-dashboards/guard.json b/engine/docker/template/grafana-dashboards/guard.json index 722321a813..1fb76de4bb 100644 --- a/engine/docker/template/grafana-dashboards/guard.json +++ b/engine/docker/template/grafana-dashboards/guard.json @@ -17,8 +17,8 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 115, + "graphTooltip": 1, + "id": 7, "links": [], "panels": [ { @@ -37,7 +37,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -89,10 +89,12 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 1, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -116,13 +118,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_route_cache_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_route_cache_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Route Cache Size", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -141,7 +161,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -193,10 +213,12 @@ "x": 8, "y": 0 }, - "id": 11, + "id": 2, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -220,13 +242,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_rate_limiter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Rate Limiters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -245,7 +285,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -297,10 +337,12 @@ "x": 16, "y": 0 }, - "id": 12, + "id": 3, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -324,13 +366,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH 1 AS smoothness\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n sum(Value) as value\n FROM otel.otel_metrics_gauge\n WHERE MetricName = 'rivet_guard_in_flight_counter_count'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND $__timeFilter(TimeUnix)\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "In-Flight Counters", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -349,7 +409,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -401,10 +461,12 @@ "x": 0, "y": 8 }, - "id": 2, + "id": 4, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -428,13 +490,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active TCP Connections", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -453,7 +533,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -482,7 +562,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -496,7 +575,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -509,16 +588,18 @@ "id": 5, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -533,13 +614,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_tcp_connection'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_tcp_connection_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "TCP Connection Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -568,9 +666,15 @@ "x": 16, "y": 8 }, - "id": 1, + "id": 6, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -600,6 +704,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -614,9 +720,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_tcp_connection_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -625,8 +732,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -637,7 +744,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -662,7 +769,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMax": 5, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -717,7 +824,9 @@ "id": 7, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, @@ -741,13 +850,31 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "legendFormat": "{{datacenter}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n avg(Value) as value\nFROM otel.otel_metrics_gauge\nWHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 4 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as label,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_pending'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, label\n)\nORDER BY label", "refId": "A" } ], "title": "Active Proxy Requests", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -766,7 +893,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -795,7 +922,6 @@ } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -809,7 +935,7 @@ } ] }, - "unit": "req/s" + "unit": "reqps" }, "overrides": [] }, @@ -822,16 +948,18 @@ "id": 8, "options": { "legend": { - "calcs": ["lastNotNull"], + "calcs": [ + "mean" + ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last *", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -846,13 +974,30 @@ "editorType": "sql", "format": 1, "instant": false, - "legendFormat": "{{datacenter_id}} - {{server_id}}", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", "range": true, - "rawSql": "SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['datacenter_id'] as datacenter_id,\n ResourceAttributes['server_id'] as server_id,\n sum(Value) / $__interval_ms * 1000 as value\nFROM otel.otel_metrics_sum\nWHERE MetricName = 'rivet_guard_proxy_request'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY time, datacenter_id, server_id\nORDER BY time", + "rawSql": "WITH\n 30 AS collector_rate_s,\n 5 AS smoothness\n\nSELECT\n time,\n label,\n avg(value) OVER (PARTITION BY label ORDER BY time ROWS BETWEEN smoothness - 1 PRECEDING AND CURRENT ROW) as value\nFROM (\n SELECT\n time,\n datacenter as label,\n if(value_diff >= 0, value_diff / time_diff, 0) as value\n FROM (\n SELECT\n time,\n datacenter,\n value,\n value - lagInFrame(value, 1, value) OVER (PARTITION BY datacenter ORDER BY time) as value_diff,\n toUnixTimestamp(time) - lagInFrame(toUnixTimestamp(time), 1, toUnixTimestamp(time)) OVER (PARTITION BY datacenter ORDER BY time) as time_diff\n FROM (\n SELECT\n $__timeInterval(TimeUnix) as time,\n ResourceAttributes['rivet.datacenter'] as datacenter,\n max(Value) as value\n FROM otel.otel_metrics_sum\n WHERE MetricName = 'rivet_guard_proxy_request_total'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000) * smoothness)\n AND TimeUnix <= $__toTime_ms()\n GROUP BY time, datacenter\n )\n )\n WHERE datacenter <> '' AND time_diff > 0\n)\nORDER BY label", "refId": "A" } ], "title": "Proxy Request Rate", + "transformations": [ + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "^value\\s(.*)$", + "renamePattern": "$1" + } + } + ], "type": "timeseries" }, { @@ -882,8 +1027,14 @@ "y": 16 }, "id": 9, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -913,6 +1064,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -927,9 +1080,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_proxy_request_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -938,8 +1092,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -950,7 +1104,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -981,13 +1135,19 @@ }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, "y": 24 }, - "id": 6, + "id": 10, + "interval": "15s", "options": { "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, "cellGap": 0, "color": { "exponent": 0.5, @@ -1017,6 +1177,8 @@ }, "yAxis": { "axisPlacement": "left", + "max": "60", + "min": 0, "reverse": false, "unit": "s" } @@ -1031,9 +1193,10 @@ "editorMode": "code", "editorType": "sql", "format": 1, - "instant": false, - "range": true, - "rawSql": "WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n ExplicitBounds AS bounds\nSELECT\n time_bucket as Time,\n bounds[idx+1] AS le,\n sum(BucketCounts[idx]) AS count\nFROM otel.otel_metrics_histogram\nWHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND $__conditionalAll(ResourceAttributes['cluster_id'], $cluster_id)\n AND $__conditionalAll(ResourceAttributes['datacenter_id'], $datacenter_id)\n AND $__conditionalAll(ResourceAttributes['server_id'], $server_id)\n AND $__timeFilter(TimeUnix)\nGROUP BY Time, le\nORDER BY Time, le", + "meta": {}, + "pluginVersion": "4.11.2", + "queryType": "table", + "rawSql": "WITH\n 30 as collector_rate_s\n\nSELECT Time, bucket, count\nFROM (\n SELECT\n Time,\n bucket,\n greatest(0, count - lagInFrame(count, 1, 0) OVER (PARTITION BY bucket ORDER BY Time)) AS count\n FROM (\n WITH\n $__timeInterval(TimeUnix) as time_bucket,\n arrayJoin(arrayEnumerate(BucketCounts)) AS idx,\n case when idx = length(BucketCounts) then inf else ExplicitBounds[idx] end as bucket\n SELECT\n time_bucket as Time,\n bucket,\n sum(BucketCounts[idx]) AS count\n FROM otel.otel_metrics_histogram\n WHERE\n MetricName = 'rivet_guard_resolve_route_duration'\n AND ResourceAttributes['rivet.project'] IN array($project)\n AND ResourceAttributes['rivet.datacenter'] IN array($datacenter)\n AND TimeUnix >= $__fromTime_ms() - toIntervalMillisecond(greatest(collector_rate_s * 1000, $__interval_s * 1000))\n AND TimeUnix <= $__toTime_ms()\n AND AggregationTemporality = 2\n GROUP BY Time, bucket\n )\n)\nWHERE $__timeFilter(Time)", "refId": "A" } ], @@ -1042,8 +1205,8 @@ { "id": "groupingToMatrix", "options": { - "columnField": "le", - "emptyValue": "zero", + "columnField": "bucket", + "emptyValue": "null", "rowField": "Time", "valueField": "count" } @@ -1054,7 +1217,7 @@ "conversions": [ { "destinationType": "time", - "targetField": "Time\\le" + "targetField": "Time\\bucket" } ], "fields": {} @@ -1065,7 +1228,7 @@ } ], "preload": false, - "refresh": "", + "refresh": "30s", "schemaVersion": 40, "tags": [], "templating": { @@ -1073,33 +1236,21 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] }, - "definition": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "description": "", - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "cluster_id", - "options": [], - "query": "SELECT DISTINCT ResourceAttributes['cluster_id'] as cluster_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY cluster_id", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "includeAll": true, - "label": "Dataceter ID", + "label": "project", "multi": true, - "name": "datacenter_id", + "name": "project", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['datacenter_id'] as datacenter_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY datacenter_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.project'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' ORDER BY ResourceAttributes['rivet.project']", "refresh": 1, "regex": "", "type": "query" @@ -1107,31 +1258,24 @@ { "current": { "text": "All", - "value": "$__all" + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-clickhouse-datasource", + "uid": "clickhouse" }, - "definition": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", - "description": "", + "definition": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "includeAll": true, - "label": "Server ID", + "label": "datacenter", "multi": true, - "name": "server_id", + "name": "datacenter", "options": [], - "query": "SELECT DISTINCT ResourceAttributes['server_id'] as server_id FROM otel.otel_metrics_gauge WHERE MetricName = 'rivet_guard_tcp_connection_pending' ORDER BY server_id", + "query": "SELECT DISTINCT ResourceAttributes['rivet.datacenter'] FROM otel.otel_metrics_gauge WHERE ServiceName = 'rivet' AND ResourceAttributes['rivet.project'] IN array($project) ORDER BY ResourceAttributes['rivet.datacenter']", "refresh": 1, "regex": "", "type": "query" - }, - { - "current": { - "text": "30", - "value": "30" - }, - "hide": 2, - "label": "Metric Export Interval (seconds)", - "name": "metric_interval", - "query": "30", - "skipUrlSync": true, - "type": "constant" } ] }, @@ -1140,9 +1284,9 @@ "to": "now" }, "timepicker": {}, - "timezone": "browser", - "title": "Rivet Guard", + "timezone": "", + "title": "Guard", "uid": "cen785ige8fswd", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/engine/docker/template/src/docker-compose.ts b/engine/docker/template/src/docker-compose.ts index 7507ec74bd..5daa3b2182 100644 --- a/engine/docker/template/src/docker-compose.ts +++ b/engine/docker/template/src/docker-compose.ts @@ -238,6 +238,7 @@ export function generateDockerCompose(context: TemplateContext) { clickhouse: { condition: "service_healthy" }, }, networks: [dcNetworkName, dcToCoreNetworkName], + ports: ["4317:4317"], }; services[otelCollectorClientServiceName] = { diff --git a/engine/docker/template/src/services/core/grafana.ts b/engine/docker/template/src/services/core/grafana.ts index d5e3ad2fa0..813e44ffba 100644 --- a/engine/docker/template/src/services/core/grafana.ts +++ b/engine/docker/template/src/services/core/grafana.ts @@ -17,7 +17,7 @@ admin_password = admin [auth.anonymous] enabled = true -org_role = Viewer +org_role = Admin [dashboards] default_home_dashboard_path = /var/lib/grafana/dashboards/api.json diff --git a/engine/docker/template/src/services/edge/otel-collector-server.ts b/engine/docker/template/src/services/edge/otel-collector-server.ts index 83d2afc1b6..5b57dc4e7c 100644 --- a/engine/docker/template/src/services/edge/otel-collector-server.ts +++ b/engine/docker/template/src/services/edge/otel-collector-server.ts @@ -3,7 +3,7 @@ import type { TemplateContext } from "../../context"; export function generateDatacenterOtelCollectorServer( context: TemplateContext, - dcId: string, + dcName: string, ) { const clickhouseHost = context.config.networkMode === "host" ? "127.0.0.1" : "clickhouse"; @@ -18,6 +18,20 @@ export function generateDatacenterOtelCollectorServer( }, }, processors: { + resource: { + attributes: [ + { + key: "rivet.project", + value: "dev", + action: "upsert", + }, + { + key: "rivet.datacenter", + value: dcName, + action: "upsert", + }, + ], + }, batch: { timeout: "5s", send_batch_size: 10000, @@ -65,17 +79,17 @@ export function generateDatacenterOtelCollectorServer( pipelines: { logs: { receivers: ["otlp"], - processors: ["batch"], + processors: ["resource", "batch"], exporters: ["clickhouse"], }, traces: { receivers: ["otlp"], - processors: ["batch"], + processors: ["resource", "batch"], exporters: ["clickhouse"], }, metrics: { receivers: ["otlp"], - processors: ["batch"], + processors: ["resource", "batch"], exporters: ["clickhouse"], }, }, @@ -86,7 +100,7 @@ export function generateDatacenterOtelCollectorServer( context.writeDatacenterServiceFile( "otel-collector-server", - dcId, + dcName, "config.yaml", yamlContent, ); diff --git a/engine/packages/api-builder/src/middleware.rs b/engine/packages/api-builder/src/middleware.rs index ba8f4e16fe..67168cd2cc 100644 --- a/engine/packages/api-builder/src/middleware.rs +++ b/engine/packages/api-builder/src/middleware.rs @@ -195,9 +195,11 @@ pub async fn http_logging_middleware( ); let error_code: String = if status.is_success() { - "".into() + String::new() + } else if let Some(err) = &error { + format!("{}.{}", err.group, err.code) } else { - status.to_string() + String::new() }; metrics::API_REQUEST_DURATION.record( duration, diff --git a/engine/packages/gasoline/src/ctx/workflow.rs b/engine/packages/gasoline/src/ctx/workflow.rs index 8987624f31..086104c34c 100644 --- a/engine/packages/gasoline/src/ctx/workflow.rs +++ b/engine/packages/gasoline/src/ctx/workflow.rs @@ -223,7 +223,7 @@ impl WorkflowCtx { 1, &[ KeyValue::new("workflow_name", self.name.clone()), - KeyValue::new("error_code", err.to_string()), + KeyValue::new("error", err.to_string()), ], ); } diff --git a/engine/packages/gasoline/src/metrics.rs b/engine/packages/gasoline/src/metrics.rs index 206089e619..71afdd524b 100644 --- a/engine/packages/gasoline/src/metrics.rs +++ b/engine/packages/gasoline/src/metrics.rs @@ -51,7 +51,7 @@ lazy_static::lazy_static! { pub static ref WORKFLOW_ACTIVE: Gauge = METER.u64_gauge("rivet_gasoline_workflow_active") .with_description("Total active workflows.") .build(); - /// Expected attributes: "workflow_name", "error_code" + /// Expected attributes: "workflow_name", "error" pub static ref WORKFLOW_DEAD: Gauge = METER.u64_gauge("rivet_gasoline_workflow_dead") .with_description("Total dead workflows.") .build(); @@ -59,7 +59,7 @@ lazy_static::lazy_static! { pub static ref WORKFLOW_SLEEPING: Gauge = METER.u64_gauge("rivet_gasoline_workflow_sleeping") .with_description("Total sleeping workflows.") .build(); - /// Expected attributes: "workflow_name", "error_code" + /// Expected attributes: "workflow_name", "error" pub static ref WORKFLOW_ERRORS: Counter = METER.u64_counter("rivet_gasoline_workflow_errors") .with_description("All errors made in a workflow.") .build(); diff --git a/engine/packages/metrics/src/buckets.rs b/engine/packages/metrics/src/buckets.rs index 56bb53c950..b719898714 100644 --- a/engine/packages/metrics/src/buckets.rs +++ b/engine/packages/metrics/src/buckets.rs @@ -1,22 +1,22 @@ pub const BUCKETS: &[f64] = &[ - // Added + // For otel + 0.0, // Added 0.001, 0.0025, // Copied from https://docs.rs/prometheus/latest/src/prometheus/histogram.rs.html#25-27 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, // Added 25.0, 50.0, 100.0, 250.0, 500.0, ]; -pub const PROVISION_BUCKETS: &[f64] = &[ - 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 35.0, 50.0, 75.0, 100.0, 125.0, 250.0, 500.0, 1000.0, -]; - pub const MICRO_BUCKETS: &[f64] = &[ - 0.0001, 0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.5, 1.0, 2.5, - 5.0, 10.0, 25.0, 50.0, + // For otel + 0.0, 0.0001, 0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.5, 1.0, + 2.5, 5.0, 10.0, 25.0, 50.0, ]; // Calculated based on the LogHistogram configuration in `packages/common/runtime/src/lib.rs` pub const TASK_POLL_BUCKETS: &[f64] = &[ + // For otel + 0.0, 0.00002, 0.000032768, 0.000065536, diff --git a/engine/packages/metrics/src/providers.rs b/engine/packages/metrics/src/providers.rs index 72771180ee..d54da249dc 100644 --- a/engine/packages/metrics/src/providers.rs +++ b/engine/packages/metrics/src/providers.rs @@ -119,7 +119,7 @@ fn init_tracer_provider() -> SdkTracerProvider { fn init_meter_provider() -> SdkMeterProvider { let exporter = opentelemetry_otlp::MetricExporter::builder() .with_tonic() - .with_temporality(opentelemetry_sdk::metrics::Temporality::Delta) + .with_temporality(opentelemetry_sdk::metrics::Temporality::Cumulative) .with_protocol(opentelemetry_otlp::Protocol::Grpc) .with_endpoint(otel_grpc_endpoint()) .build() diff --git a/engine/packages/pegboard/src/metrics.rs b/engine/packages/pegboard/src/metrics.rs index b529b8a0ac..30324e7d31 100644 --- a/engine/packages/pegboard/src/metrics.rs +++ b/engine/packages/pegboard/src/metrics.rs @@ -1,44 +1,14 @@ use rivet_metrics::{ - BUCKETS, MICRO_BUCKETS, + MICRO_BUCKETS, otel::{global::*, metrics::*}, }; lazy_static::lazy_static! { static ref METER: Meter = meter("rivet-pegboard"); - /// Expected attributes: "client_id", "index" - pub static ref CLIENT_DUPLICATE_EVENT: Counter = METER.u64_counter("rivet_pegboard_client_duplicate_event") - .with_description("Duplicate client event that was attempted to be inserted.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_CPU_TOTAL: Gauge = METER.f64_gauge("rivet_pegboard_client_cpu_total") - .with_description("Total millicores of cpu available on a client.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_MEMORY_TOTAL: Gauge = METER.f64_gauge("rivet_pegboard_client_memory_total") - .with_description("Total MiB of memory available on a client.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_CPU_ALLOCATED: Gauge = METER.f64_gauge("rivet_pegboard_client_cpu_allocated") - .with_description("Total millicores of cpu allocated on a client.") - .build(); - - /// Expected attributes: "client_id", "flavor", "state" - pub static ref CLIENT_MEMORY_ALLOCATED: Gauge = METER.f64_gauge("rivet_pegboard_client_memory_allocated") - .with_description("Total MiB of memory allocated on a client.") - .build(); - - /// Has no expected attributes - pub static ref ACTOR_CPU_PENDING_ALLOCATION: Gauge = METER.f64_gauge("rivet_pegboard_actor_cpu_pending_allocation") - .with_description("Total actor cpu waiting for availability.") - .build(); - - /// Has no expected attributes - pub static ref ACTOR_MEMORY_PENDING_ALLOCATION: Gauge = METER.f64_gauge("rivet_pegboard_actor_memory_pending_allocation") - .with_description("Total actor memory waiting for availability.") + /// Expected attributes: "namespace_id", "runner_name" + pub static ref ACTOR_PENDING_ALLOCATION: Gauge = METER.f64_gauge("rivet_pegboard_actor_pending_allocation") + .with_description("Total actors waiting for availability.") .build(); /// Expected attributes: "did_reserve" @@ -46,20 +16,4 @@ lazy_static::lazy_static! { .with_description("Total duration to reserve resources for an actor.") .with_boundaries(MICRO_BUCKETS.to_vec()) .build(); - - /// Has no expected attributes - pub static ref ACTOR_START_DURATION: Histogram = METER.f64_histogram("rivet_pegboard_actor_start_duration") - .with_description("Total duration from actor creation to starting state.") - .with_boundaries(BUCKETS.to_vec()) - .build(); - - /// Expected attributes: "env_id", "flavor" - pub static ref ENV_CPU_USAGE: Gauge = METER.f64_gauge("rivet_pegboard_env_cpu_usage") - .with_description("Total millicores used by an environment.") - .build(); - - /// Expected attributes: "env_id", "flavor" - pub static ref ENV_MEMORY_USAGE: Gauge = METER.f64_gauge("rivet_pegboard_env_memory_usage") - .with_description("Total MiB of memory used by an environment.") - .build(); } diff --git a/engine/packages/pegboard/src/workflows/runner.rs b/engine/packages/pegboard/src/workflows/runner.rs index 43c1c876e6..c25c11a3d1 100644 --- a/engine/packages/pegboard/src/workflows/runner.rs +++ b/engine/packages/pegboard/src/workflows/runner.rs @@ -1,6 +1,7 @@ use futures_util::{FutureExt, StreamExt, TryStreamExt}; use gas::prelude::*; use rivet_data::converted::{ActorNameKeyData, MetadataKeyData, RunnerByKeyKeyData}; +use rivet_metrics::KeyValue; use rivet_runner_protocol::{self as protocol, PROTOCOL_VERSION, versioned}; use universaldb::{ options::{ConflictRangeType, StreamingMode}, @@ -9,7 +10,7 @@ use universaldb::{ use universalpubsub::PublishOpts; use vbare::OwnedVersionedData; -use crate::{keys, workflows::actor::Allocate}; +use crate::{keys, metrics, workflows::actor::Allocate}; /// How long after last ping before considering a runner ineligible for allocation. pub const RUNNER_ELIGIBLE_THRESHOLD_MS: i64 = util::duration::seconds(10); @@ -989,11 +990,11 @@ pub(crate) async fn allocate_pending_actors( input: &AllocatePendingActorsInput, ) -> Result { // NOTE: This txn should closely resemble the one found in the allocate_actor activity of the actor wf - let res = ctx + let (allocations, pending_actor_count) = ctx .udb()? .run(|tx| async move { let tx = tx.with_subspace(keys::subspace()); - let mut results = Vec::new(); + let mut allocations = Vec::new(); let pending_actor_subspace = keys::subspace().subspace( &keys::ns::PendingActorByRunnerNameSelectorKey::subspace( @@ -1010,6 +1011,7 @@ pub(crate) async fn allocate_pending_actors( // the one we choose Snapshot, ); + let mut pending_actor_count = 0; let ping_threshold_ts = util::timestamp::now() - RUNNER_ELIGIBLE_THRESHOLD_MS; 'queue_loop: loop { @@ -1017,6 +1019,8 @@ pub(crate) async fn allocate_pending_actors( break; }; + pending_actor_count += 1; + let (queue_key, generation) = tx.read_entry::(&queue_entry)?; @@ -1115,23 +1119,33 @@ pub(crate) async fn allocate_pending_actors( generation, )?; - results.push(ActorAllocation { + allocations.push(ActorAllocation { actor_id: queue_key.actor_id, signal: Allocate { runner_id: old_runner_alloc_key.runner_id, runner_workflow_id: old_runner_alloc_key_data.workflow_id, }, }); + + pending_actor_count -= 1; continue 'queue_loop; } } - Ok(results) + Ok((allocations, pending_actor_count)) }) .custom_instrument(tracing::info_span!("runner_allocate_pending_actors_tx")) .await?; - Ok(AllocatePendingActorsOutput { allocations: res }) + metrics::ACTOR_PENDING_ALLOCATION.record( + pending_actor_count as f64, + &[ + KeyValue::new("namespace_id", input.namespace_id.to_string()), + KeyValue::new("runner_name", input.name.to_string()), + ], + ); + + Ok(AllocatePendingActorsOutput { allocations }) } #[derive(Debug, Serialize, Deserialize, Hash)]