From 3960a7ab152c8e8bfe19a466a0ddef5ad7d51ee7 Mon Sep 17 00:00:00 2001 From: Smriti Agrawal Date: Tue, 25 Nov 2025 14:41:42 +0530 Subject: [PATCH] Whitelisting Onelake API & Workspace PL FQDNs --- src/azure/builder.rs | 83 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/src/azure/builder.rs b/src/azure/builder.rs index e824217f..6d5bfafa 100644 --- a/src/azure/builder.rs +++ b/src/azure/builder.rs @@ -25,6 +25,7 @@ use crate::client::{HttpConnector, TokenCredentialProvider, http_connector}; use crate::config::ConfigValue; use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider}; use percent_encoding::percent_decode_str; +use regex::Regex; use serde::{Deserialize, Serialize}; use std::str::FromStr; use std::sync::Arc; @@ -657,6 +658,9 @@ impl MicrosoftAzureBuilder { false => Ok(s.to_string()), }; + const DFS_FABRIC_SUFFIX: &str = "dfs.fabric.microsoft.com"; + const BLOB_FABRIC_SUFFIX: &str = "blob.fabric.microsoft.com"; + match parsed.scheme() { "adl" | "azure" => self.container_name = Some(validate(host)?), "az" | "abfs" | "abfss" => { @@ -675,32 +679,79 @@ impl MicrosoftAzureBuilder { return Err(Error::UrlNotRecognised { url: url.into() }.into()); } } - "https" => match host.split_once('.') { - Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => { - self.account_name = Some(validate(a)?); - let container = parsed.path_segments().unwrap().next().expect( + "https" => { + const DFS_FABRIC_SUFFIX: &str = "dfs.fabric.microsoft.com"; + const BLOB_FABRIC_SUFFIX: &str = "blob.fabric.microsoft.com"; + const DFS_AZURE_SUFFIX: &str = "dfs.core.windows.net"; + const BLOB_AZURE_SUFFIX: &str = "blob.core.windows.net"; + + // Regex to match WS-PL FQDN: "{workspaceid}.z??.dfs.fabric.microsoft.com" + // workspaceid = 32 hex chars, z?? = z + first two chars of workspaceid + lazy_static::lazy_static! { + static ref WS_PL_REGEX: Regex = Regex::new(r"^(?P[0-9a-f]{32})\.z(?P[0-9a-f]{2})\.(dfs|blob)\.fabric\.microsoft\.com$").unwrap(); + } + + if let Some(captures) = WS_PL_REGEX.captures(host) { + let workspaceid = captures.name("workspaceid").unwrap().as_str(); + let xy = captures.name("xy").unwrap().as_str(); + + // Validate z?? matches first 2 chars of workspaceid + if &workspaceid[0..2] != xy { + return Err(Error::UrlNotRecognised { url: url.into() }.into()); + } + + self.account_name = Some(validate(workspaceid)?); + self.use_fabric_endpoint = true; + + let container = parsed + .path_segments() + .and_then(|mut s| s.next()) + .unwrap_or(""); + if !container.is_empty() { + self.container_name = Some(validate(container)?); + } + + return Ok(()); + } + + // Otherwise, check Fabric global / Onelake API FQDN + if host.ends_with(DFS_FABRIC_SUFFIX) || host.ends_with(BLOB_FABRIC_SUFFIX) { + let labels: Vec<&str> = host.split('.').collect(); + let account_name = if labels.len() >= 2 && labels[0].contains("api") && labels[1] == "onelake" { + format!("{}-{}", labels[0], labels[1]) + } else { + labels[0].to_string() + }; + + self.account_name = Some(validate(&account_name)?); + self.use_fabric_endpoint = true; + + let container = parsed.path_segments().unwrap().next().expect( "iterator always contains at least one string (which may be empty)", ); if !container.is_empty() { self.container_name = Some(validate(container)?); } + + return Ok(()); } - Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => { - self.account_name = Some(validate(a)?); - // Attempt to infer the container name from the URL - // - https://onelake.dfs.fabric.microsoft.com///Files/test.csv - // - https://onelake.dfs.fabric.microsoft.com//.// - // - // See - let workspace = parsed.path_segments().unwrap().next().expect( + + // Azure Storage public + if host.ends_with(DFS_AZURE_SUFFIX) || host.ends_with(BLOB_AZURE_SUFFIX) { + let first_label = host.split('.').next().unwrap_or_default(); + self.account_name = Some(validate(first_label)?); + + let container = parsed.path_segments().unwrap().next().expect( "iterator always contains at least one string (which may be empty)", ); - if !workspace.is_empty() { - self.container_name = Some(workspace.to_string()) + if !container.is_empty() { + self.container_name = Some(validate(container)?); } - self.use_fabric_endpoint = true.into(); + + return Ok(()); } - _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()), + + return Err(Error::UrlNotRecognised { url: url.into() }.into()); }, scheme => { let scheme = scheme.into();