Skip to content

Commit c7f5a11

Browse files
authored
[reconfigurator] Retrieve keeper lgif information (#6549)
## Overview This commit introduces a new `clickhouse-admin` API endpoint: `/keeper/lgif`. This endpoint uses the ClickHouse CLI internally to retrieve and parse the logically grouped information file from the ClickHouse keepers. ## Purpose Reconfigurator will need this information to reliably manage and operate a ClickHouse replicated cluster. Additional endpoints to retrieve other information from ClickHouse servers or keepers will be added in follow up PRs. ## Testing In addition to the unit tests, I have manually tested with the following results: ```console $ cargo run --bin=clickhouse-admin -- run -c ./smf/clickhouse-admin/config.toml -a [::1]:8888 -l [::1]:20001 -b /Users/karcar/src/omicron/out/clickhouse/clickhouse Compiling omicron-clickhouse-admin v0.1.0 (/Users/karcar/src/omicron/clickhouse-admin) Finished `dev` profile [unoptimized + debuginfo] target(s) in 2.46s Running `target/debug/clickhouse-admin run -c ./smf/clickhouse-admin/config.toml -a '[::1]:8888' -l '[::1]:20001' -b /Users/karcar/src/omicron/out/clickhouse/clickhouse` note: configured to log to "/dev/stdout" {"msg":"listening","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-12T02:37:19.383597Z","hostname":"ixchel","pid":3115,"local_addr":"[::1]:8888","component":"dropshot","file":"/Users/karcar/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:205"} {"msg":"accepted connection","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-12T02:37:23.843325Z","hostname":"ixchel","pid":3115,"local_addr":"[::1]:8888","component":"dropshot","file":"/Users/karcar/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:775","remote_addr":"[::1]:54455"} {"msg":"request completed","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-12T02:37:24.302588Z","hostname":"ixchel","pid":3115,"uri":"/keeper/lgif","method":"GET","req_id":"64b232d0-d6ac-4cae-8f0a-f14cf6d1dfba","remote_addr":"[::1]:54455","local_addr":"[::1]:8888","component":"dropshot","file":"/Users/karcar/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:914","latency_us":458301,"response_code":"200"} ``` ```console $ curl http://[::1]:8888/keeper/lgif {"first_log_idx":1,"first_log_term":1,"last_log_idx":11717,"last_log_term":20,"last_committed_log_idx":11717,"leader_committed_log_idx":11717,"target_committed_log_idx":11717,"last_snapshot_idx":9465} ``` Related: #5999
1 parent bac635f commit c7f5a11

File tree

17 files changed

+725
-95
lines changed

17 files changed

+725
-95
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clickhouse-admin/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,13 @@ toml.workspace = true
3030
omicron-workspace-hack.workspace = true
3131

3232
[dev-dependencies]
33+
clickward.workspace = true
34+
dropshot.workspace = true
3335
expectorate.workspace = true
3436
nexus-test-utils.workspace = true
3537
omicron-test-utils.workspace = true
38+
oximeter-db.workspace = true
39+
oximeter-test-utils.workspace = true
3640
openapi-lint.workspace = true
3741
openapiv3.workspace = true
3842
serde_json.workspace = true

clickhouse-admin/api/src/lib.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

55
use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
6-
use clickhouse_admin_types::{KeeperSettings, ServerSettings};
7-
use dropshot::{HttpError, HttpResponseCreated, RequestContext, TypedBody};
6+
use clickhouse_admin_types::{KeeperSettings, Lgif, ServerSettings};
7+
use dropshot::{
8+
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
9+
};
810
use omicron_common::api::external::Generation;
911
use schemars::JsonSchema;
1012
use serde::Deserialize;
@@ -50,4 +52,15 @@ pub trait ClickhouseAdminApi {
5052
rqctx: RequestContext<Self::Context>,
5153
body: TypedBody<KeeperConfigurableSettings>,
5254
) -> Result<HttpResponseCreated<KeeperConfig>, HttpError>;
55+
56+
/// Retrieve a logically grouped information file from a keeper node.
57+
/// This information is used internally by ZooKeeper to manage snapshots
58+
/// and logs for consistency and recovery.
59+
#[endpoint {
60+
method = GET,
61+
path = "/keeper/lgif",
62+
}]
63+
async fn lgif(
64+
rqctx: RequestContext<Self::Context>,
65+
) -> Result<HttpResponseOk<Lgif>, HttpError>;
5366
}

clickhouse-admin/src/bin/clickhouse-admin.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
use anyhow::anyhow;
88
use camino::Utf8PathBuf;
99
use clap::Parser;
10-
use omicron_clickhouse_admin::{Clickward, Config};
10+
use omicron_clickhouse_admin::{ClickhouseCli, Clickward, Config};
1111
use omicron_common::cmd::fatal;
1212
use omicron_common::cmd::CmdError;
1313
use std::net::{SocketAddr, SocketAddrV6};
@@ -27,6 +27,14 @@ enum Args {
2727
/// Path to the server configuration file
2828
#[clap(long, short, action)]
2929
config: Utf8PathBuf,
30+
31+
/// Address on which the clickhouse server or keeper is listening on
32+
#[clap(long, short = 'l', action)]
33+
listen_address: SocketAddrV6,
34+
35+
/// Path to the clickhouse binary
36+
#[clap(long, short, action)]
37+
binary_path: Utf8PathBuf,
3038
},
3139
}
3240

@@ -41,17 +49,21 @@ async fn main_impl() -> Result<(), CmdError> {
4149
let args = Args::parse();
4250

4351
match args {
44-
Args::Run { http_address, config } => {
52+
Args::Run { http_address, config, listen_address, binary_path } => {
4553
let mut config = Config::from_file(&config)
4654
.map_err(|err| CmdError::Failure(anyhow!(err)))?;
4755
config.dropshot.bind_address = SocketAddr::V6(http_address);
48-
4956
let clickward = Clickward::new();
57+
let clickhouse_cli =
58+
ClickhouseCli::new(binary_path, listen_address);
5059

51-
let server =
52-
omicron_clickhouse_admin::start_server(clickward, config)
53-
.await
54-
.map_err(|err| CmdError::Failure(anyhow!(err)))?;
60+
let server = omicron_clickhouse_admin::start_server(
61+
clickward,
62+
clickhouse_cli,
63+
config,
64+
)
65+
.await
66+
.map_err(|err| CmdError::Failure(anyhow!(err)))?;
5567
server.await.map_err(|err| {
5668
CmdError::Failure(anyhow!(
5769
"server failed after starting: {err}"
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
use anyhow::Result;
6+
use camino::Utf8PathBuf;
7+
use clickhouse_admin_types::Lgif;
8+
use dropshot::HttpError;
9+
use illumos_utils::{output_to_exec_error, ExecutionError};
10+
use slog::Logger;
11+
use slog_error_chain::{InlineErrorChain, SlogInlineError};
12+
use std::ffi::OsStr;
13+
use std::io;
14+
use std::net::SocketAddrV6;
15+
use tokio::process::Command;
16+
17+
#[derive(Debug, thiserror::Error, SlogInlineError)]
18+
pub enum ClickhouseCliError {
19+
#[error("failed to run `clickhouse {subcommand}`")]
20+
Run {
21+
description: &'static str,
22+
subcommand: String,
23+
#[source]
24+
err: io::Error,
25+
},
26+
#[error(transparent)]
27+
ExecutionError(#[from] ExecutionError),
28+
#[error("failed to parse command output")]
29+
Parse {
30+
description: &'static str,
31+
stdout: String,
32+
stderr: String,
33+
#[source]
34+
err: anyhow::Error,
35+
},
36+
}
37+
38+
impl From<ClickhouseCliError> for HttpError {
39+
fn from(err: ClickhouseCliError) -> Self {
40+
match err {
41+
ClickhouseCliError::Run { .. }
42+
| ClickhouseCliError::Parse { .. }
43+
| ClickhouseCliError::ExecutionError(_) => {
44+
let message = InlineErrorChain::new(&err).to_string();
45+
HttpError {
46+
status_code: http::StatusCode::INTERNAL_SERVER_ERROR,
47+
error_code: Some(String::from("Internal")),
48+
external_message: message.clone(),
49+
internal_message: message,
50+
}
51+
}
52+
}
53+
}
54+
}
55+
56+
#[derive(Debug)]
57+
pub struct ClickhouseCli {
58+
/// Path to where the clickhouse binary is located
59+
pub binary_path: Utf8PathBuf,
60+
/// Address on where the clickhouse keeper is listening on
61+
pub listen_address: SocketAddrV6,
62+
pub log: Option<Logger>,
63+
}
64+
65+
impl ClickhouseCli {
66+
pub fn new(binary_path: Utf8PathBuf, listen_address: SocketAddrV6) -> Self {
67+
Self { binary_path, listen_address, log: None }
68+
}
69+
70+
pub fn with_log(mut self, log: Logger) -> Self {
71+
self.log = Some(log);
72+
self
73+
}
74+
75+
pub async fn lgif(&self) -> Result<Lgif, ClickhouseCliError> {
76+
self.keeper_client_non_interactive(
77+
"lgif",
78+
"Retrieve logically grouped information file",
79+
Lgif::parse,
80+
self.log.clone().unwrap(),
81+
)
82+
.await
83+
}
84+
85+
async fn keeper_client_non_interactive<F, T>(
86+
&self,
87+
query: &str,
88+
subcommand_description: &'static str,
89+
parse: F,
90+
log: Logger,
91+
) -> Result<T, ClickhouseCliError>
92+
where
93+
F: FnOnce(&Logger, &[u8]) -> Result<T>,
94+
{
95+
let mut command = Command::new(&self.binary_path);
96+
command
97+
.arg("keeper-client")
98+
.arg("--host")
99+
.arg(&format!("[{}]", self.listen_address.ip()))
100+
.arg("--port")
101+
.arg(&format!("{}", self.listen_address.port()))
102+
.arg("--query")
103+
.arg(query);
104+
105+
let output = command.output().await.map_err(|err| {
106+
let err_args: Vec<&OsStr> = command.as_std().get_args().collect();
107+
let err_args_parsed: Vec<String> = err_args
108+
.iter()
109+
.map(|&os_str| os_str.to_string_lossy().into_owned())
110+
.collect();
111+
let err_args_str = err_args_parsed.join(" ");
112+
ClickhouseCliError::Run {
113+
description: subcommand_description,
114+
subcommand: err_args_str,
115+
err,
116+
}
117+
})?;
118+
119+
if !output.status.success() {
120+
return Err(output_to_exec_error(command.as_std(), &output).into());
121+
}
122+
123+
parse(&log, &output.stdout).map_err(|err| ClickhouseCliError::Parse {
124+
description: subcommand_description,
125+
stdout: String::from_utf8_lossy(&output.stdout).to_string(),
126+
stderr: String::from_utf8_lossy(&output.stdout).to_string(),
127+
err,
128+
})
129+
}
130+
}

clickhouse-admin/src/context.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,29 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5-
use crate::Clickward;
5+
use crate::{ClickhouseCli, Clickward};
66
use slog::Logger;
77

88
pub struct ServerContext {
99
clickward: Clickward,
10+
clickhouse_cli: ClickhouseCli,
1011
_log: Logger,
1112
}
1213

1314
impl ServerContext {
14-
pub fn new(clickward: Clickward, _log: Logger) -> Self {
15-
Self { clickward, _log }
15+
pub fn new(
16+
clickward: Clickward,
17+
clickhouse_cli: ClickhouseCli,
18+
_log: Logger,
19+
) -> Self {
20+
Self { clickward, clickhouse_cli, _log }
1621
}
1722

1823
pub fn clickward(&self) -> &Clickward {
1924
&self.clickward
2025
}
26+
27+
pub fn clickhouse_cli(&self) -> &ClickhouseCli {
28+
&self.clickhouse_cli
29+
}
2130
}

clickhouse-admin/src/http_entrypoints.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
use crate::context::ServerContext;
66
use clickhouse_admin_api::*;
77
use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
8-
use dropshot::{HttpError, HttpResponseCreated, RequestContext, TypedBody};
8+
use clickhouse_admin_types::Lgif;
9+
use dropshot::{
10+
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
11+
};
912
use std::sync::Arc;
1013

1114
type ClickhouseApiDescription = dropshot::ApiDescription<Arc<ServerContext>>;
@@ -44,4 +47,12 @@ impl ClickhouseAdminApi for ClickhouseAdminImpl {
4447
let output = ctx.clickward().generate_keeper_config(keeper.settings)?;
4548
Ok(HttpResponseCreated(output))
4649
}
50+
51+
async fn lgif(
52+
rqctx: RequestContext<Self::Context>,
53+
) -> Result<HttpResponseOk<Lgif>, HttpError> {
54+
let ctx = rqctx.context();
55+
let output = ctx.clickhouse_cli().lgif().await?;
56+
Ok(HttpResponseOk(output))
57+
}
4758
}

clickhouse-admin/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ use std::error::Error;
1111
use std::io;
1212
use std::sync::Arc;
1313

14+
mod clickhouse_cli;
1415
mod clickward;
1516
mod config;
1617
mod context;
1718
mod http_entrypoints;
1819

20+
pub use clickhouse_cli::ClickhouseCli;
1921
pub use clickward::Clickward;
2022
pub use config::Config;
2123

@@ -34,6 +36,7 @@ pub type Server = dropshot::HttpServer<Arc<ServerContext>>;
3436
/// Start the dropshot server
3537
pub async fn start_server(
3638
clickward: Clickward,
39+
clickhouse_cli: ClickhouseCli,
3740
server_config: Config,
3841
) -> Result<Server, StartError> {
3942
let (drain, registration) = slog_dtrace::with_drain(
@@ -56,6 +59,8 @@ pub async fn start_server(
5659

5760
let context = ServerContext::new(
5861
clickward,
62+
clickhouse_cli
63+
.with_log(log.new(slog::o!("component" => "ClickhouseCli"))),
5964
log.new(slog::o!("component" => "ServerContext")),
6065
);
6166
let http_server_starter = dropshot::HttpServerStarter::new(

0 commit comments

Comments
 (0)