Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "extract" subcommand to mbtiles tool #1725

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ mbtiles = { path = "./mbtiles", version = "0.12.0" }
md5 = "0.7.0"
moka = { version = "0.12", features = ["future"] }
num_cpus = "1"
object_store = { version = "0.11", features = ["aws", "azure", "gcp"] }
pbf_font_tools = { version = "2.5.1", features = ["freetype"] }
pmtiles = { version = "0.11", features = ["http-async", "mmap-async-tokio", "tilejson", "reqwest-rustls-tls-native-roots"] }
png = "0.17.14"
Expand All @@ -84,6 +85,7 @@ sqlite-compressions = { version = "0.2.16", default-features = false, features =
sqlite-hashes = { version = "0.7.9", default-features = false, features = ["md5", "aggregate", "hex"] }
sqlx = { version = "0.7", features = ["sqlite", "runtime-tokio"] }
static-files = "0.2"
strfmt = "0.2"
subst = { version = "0.3", features = ["yaml"] }
testcontainers-modules = { version = "0.11.6", features = ["postgres"] }
thiserror = "2"
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion mbtiles/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ rust-version.workspace = true

[features]
default = ["cli"]
cli = ["dep:anyhow", "dep:clap", "dep:env_logger", "dep:serde_yaml"]
cli = ["dep:anyhow", "dep:clap", "dep:env_logger", "dep:serde_yaml", "dep:regex"]

[dependencies]
enum-display.workspace = true
Expand All @@ -25,23 +25,27 @@ log.workspace = true
martin-tile-utils.workspace = true
md5.workspace = true
num_cpus.workspace = true
object_store.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true
size_format.workspace = true
sqlite-compressions.workspace = true
sqlite-hashes.workspace = true
sqlx.workspace = true
strfmt.workspace = true
thiserror.workspace = true
tilejson.workspace = true
tokio = { workspace = true, features = ["rt-multi-thread"] }
url.workspace = true
xxhash-rust.workspace = true

# Bin dependencies
anyhow = { workspace = true, optional = true }
clap = { workspace = true, optional = true }
env_logger = { workspace = true, optional = true }
serde_yaml = { workspace = true, optional = true }
regex = { workspace = true, optional = true, features = [] }

[dev-dependencies]
# For testing, might as well use the same async framework as the Martin itself
Expand Down
170 changes: 168 additions & 2 deletions mbtiles/src/bin/mbtiles.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use std::ffi::OsStr;
use std::path::{Path, PathBuf};

use clap::{Parser, Subcommand};
use clap::builder::{StringValueParser, TypedValueParser};
use clap::error::{ContextKind, ContextValue};
use clap::{Arg, Command, Parser, Subcommand};
use log::error;
use mbtiles::{
apply_patch, AggHashType, CopyDuplicateMode, CopyType, IntegrityCheckType, MbtResult,
apply_patch, extract, AggHashType, CopyDuplicateMode, CopyType, IntegrityCheckType, MbtResult,
MbtTypeCli, Mbtiles, MbtilesCopier, PatchTypeCli, UpdateZoomType,
};
use regex::Regex;
use tilejson::Bounds;

#[derive(Parser, PartialEq, Debug)]
Expand Down Expand Up @@ -94,6 +98,9 @@ enum Commands {
#[arg(long, value_enum)]
agg_hash: Option<AggHashType>,
},
/// Extract tiles into the filesystem or an object store
#[command(name = "extract")]
Extract(ExtractArgs),
}

#[allow(clippy::doc_markdown)]
Expand Down Expand Up @@ -205,6 +212,117 @@ impl SharedCopyOpts {
}
}

#[derive(Clone)]
pub struct KeyValueParser {
inner: StringValueParser,
regex_kex_value: Regex,
}

impl Default for KeyValueParser {
fn default() -> Self {
Self {
inner: StringValueParser::new(),
regex_kex_value: Regex::new(r"\s*([^\s=]+)\s*=\s*(.*)").expect("key-value regex"),
}
}
}

impl TypedValueParser for KeyValueParser {
type Value = (String, String);

fn parse_ref(
&self,
cmd: &Command,
arg: Option<&Arg>,
value: &OsStr,
) -> Result<Self::Value, clap::Error> {
let value = self.inner.parse_ref(cmd, arg, value)?;
if let Some(captures) = self.regex_kex_value.captures(&value) {
let (_, [key, value]) = captures.extract();
Ok((key.to_string(), value.to_string()))
} else {
let mut err = clap::Error::new(clap::error::ErrorKind::ValueValidation);
if let Some(arg) = arg {
err.insert(
ContextKind::InvalidArg,
ContextValue::String(arg.to_string()),
);
}
err.insert(ContextKind::InvalidValue, ContextValue::String(value));
err.insert(
ContextKind::Usage,
ContextValue::String(
"value must be valid key-value pair in the form <key>=<value>".to_string(),
),
);
Err(err)
}
}
}

/// Extract tiles from an MBTiles file to an object store or filesystem.
#[allow(clippy::doc_markdown)]
#[derive(Clone, Default, PartialEq, Debug, clap::Args)]
pub struct ExtractArgs {
/// MBTiles file to extract from.
file: PathBuf,

/// The object store URL to extract to.
///
/// Supported schemes are:
/// - file:///path
/// - s3://bucket/path for Amazon S3
/// - az://container/path or abfs://container@account_name.dfs.core.windows.net/path for Microsoft Azure
/// - gs://bucket/path for Google Cloud Storage
///
/// The path-fragment supports placeholders which will be replaced with the tiles x, y,
/// and z coordinates. Examples:
///
/// - file:///my/directory/{z}/{x}/{y}.pbf
/// - s3://my-bucket/tiles/{z}/{x}/{y}.png
///
/// Not using these placeholders will result in the same file being overwritten over and over
/// again.
///
/// Some of the supported object stores require additional configuration like access keys, endpoints, ...
/// using the `-o` option and/or environment variables. Supported environment variables are documented here:
///
/// - Amazon S3: https://docs.rs/object_store/0.11.1/object_store/aws/struct.AmazonS3Builder.html#method.from_env
/// - Microsoft Azure: https://docs.rs/object_store/0.11.1/object_store/azure/struct.MicrosoftAzureBuilder.html#method.from_env
/// - Google Cloud Storage: https://docs.rs/object_store/0.11.1/object_store/gcp/struct.GoogleCloudStorageBuilder.html#method.from_env
///
/// Additionally, there are aliases for the schemes as documented in the object_store crate:
/// https://docs.rs/object_store/0.11.1/object_store/enum.ObjectStoreScheme.html#supported-formats
///
/// In case the object store supports metadata like the content-type and content-encoding, these will be set
/// according to the tile format and encoding specified in the metadata in the MBTiles file. S3, Azure, and GCP
/// support this metadata.
#[arg(verbatim_doc_comment)]
object_store_url: String,

/// Options to pass to the object store. These are key-value pairs in the form `key=value`.
///
/// The supported options are documented here:
///
/// - Amazon S3: https://docs.rs/object_store/0.11.1/object_store/aws/enum.AmazonS3ConfigKey.html
/// - Microsoft Azure: https://docs.rs/object_store/0.11.1/object_store/azure/enum.AzureConfigKey.html
/// - Google Cloud Storage: https://docs.rs/object_store/0.11.1/object_store/gcp/enum.GoogleConfigKey.html
///
/// Example options to access the S3-compatible MinIO server running on localhost:
///
/// -o endpoint=http://localhost:9000 -o access_key_id=*** -o secret_access_key=*** -o aws_allow_http=1
#[arg(short = 'o', value_parser = KeyValueParser::default(), verbatim_doc_comment)]
object_store_options: Vec<(String, String)>,

/// The number of concurrent tasks to use when extracting tiles.
#[arg(short = 'c', default_value = "8")]
concurrency: u8,

/// Decode/decompress the tile data before writing to the object store.
#[arg(long, default_value = "false")]
decode: bool,
}

#[tokio::main]
async fn main() {
let env = env_logger::Env::default().default_filter_or("mbtiles=info");
Expand Down Expand Up @@ -290,6 +408,16 @@ async fn main_int() -> anyhow::Result<()> {
println!("MBTiles file summary for {mbt}");
println!("{}", mbt.summary(&mut conn).await?);
}
Commands::Extract(args) => {
extract(
args.file,
&args.object_store_url,
args.object_store_options,
args.concurrency,
args.decode,
)
.await?;
}
}

Ok(())
Expand Down Expand Up @@ -643,4 +771,42 @@ mod tests {
}
);
}

#[test]
fn test_parse_keyvalue() {
let parser = KeyValueParser::default();
assert_eq!(
parser
.parse_ref(&Command::new("test"), None, OsStr::new("key=value"))
.unwrap(),
("key".to_string(), "value".to_string())
);
}

#[test]
fn test_parse_keyvalue_whitespace() {
let parser = KeyValueParser::default();
assert_eq!(
parser
.parse_ref(&Command::new("test"), None, OsStr::new("key = value"))
.unwrap(),
("key".to_string(), "value".to_string())
);
}

#[test]
fn test_parse_keyvalue_fail() {
let parser = KeyValueParser::default();
assert_eq!(
parser
.parse_ref(
&Command::new("test"),
None,
OsStr::new("missing equals sign")
)
.unwrap_err()
.kind(),
ErrorKind::ValueValidation
);
}
}
21 changes: 21 additions & 0 deletions mbtiles/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,27 @@ pub enum MbtError {

#[error(transparent)]
IoError(#[from] std::io::Error),

#[error(transparent)]
ObjectStoreError(#[from] object_store::Error),

#[error(transparent)]
ObjectStorePathError(#[from] object_store::path::Error),

#[error("Unable to parse object store URL")]
ObjectStoreParseError,

#[error(transparent)]
UrlParseError(#[from] url::ParseError),

#[error("Unsupported object store scheme")]
UnsupportedObjectStoreScheme,

#[error("Object store key format error")]
ObjectStoreKeyFormatError,

#[error("Unsupported encoding for decompression")]
UnsupportedEncodingForDecompression,
}

pub type MbtResult<T> = Result<T, MbtError>;
Loading
Loading