Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
e145ef3
Implement databricks creds manager
m-abulazm Oct 28, 2025
6c75f6c
[WIP] use `DatabricksSecretProvider` in reconcile datasources
m-abulazm Oct 29, 2025
8b7fb0b
Merge branch 'main' into refactor/creds-manager
m-abulazm Oct 29, 2025
3f81eb4
update oracle data source after merge
m-abulazm Oct 29, 2025
4f037a5
Merge branch 'main' into refactor/creds-manager
m-abulazm Nov 7, 2025
374b63f
add load_credentials interface for data sources and impl for snowflak…
m-abulazm Nov 10, 2025
9a1286b
Merge branch 'main' into refactor/creds-manager
m-abulazm Nov 10, 2025
5102b3b
implement for tsql and oracle
m-abulazm Nov 10, 2025
e07e443
improve snowflake load credentials
m-abulazm Nov 10, 2025
fb6692d
add check if user loaded credentials in snowflake and use local creds…
m-abulazm Nov 10, 2025
a16455d
revert changes to reconcile before moving to a new branch
m-abulazm Nov 21, 2025
a62b6e0
Merge branch 'main' into refactor/creds-manager
m-abulazm Nov 21, 2025
79b0721
revert two more changes
m-abulazm Nov 21, 2025
c284f50
implement `test_databricks_credentials`
m-abulazm Nov 21, 2025
287c654
do not patch in specs and do not accept str on the factory method
m-abulazm Nov 21, 2025
53422f4
add test to increase coverage
m-abulazm Nov 24, 2025
88b5c14
remove unused addition
m-abulazm Nov 24, 2025
4957aff
Bump reconcile config to v2 and only use creds with a v1_migrate hepe…
m-abulazm Nov 24, 2025
be3f4bf
fix test fixtures
m-abulazm Nov 24, 2025
9b1dd10
one more fix
m-abulazm Nov 24, 2025
4335856
Merge branch 'main' into refactor/creds-manager
m-abulazm Nov 25, 2025
7264dda
Merge branch 'main' into refactor/creds-manager
gueniai Nov 26, 2025
6ceb7aa
Merge branch 'main' into refactor/creds-manager
gueniai Nov 26, 2025
75611fb
Merge branch 'main' into refactor/creds-manager
m-abulazm Nov 27, 2025
76b044e
change hardcoded vault_type value (it is only used in a log line so far)
m-abulazm Nov 28, 2025
c0b1080
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 3, 2025
87ff1a6
Apply suggestions from code review
m-abulazm Dec 3, 2025
6d8230c
fmt after applying suggestions
m-abulazm Dec 3, 2025
e31a720
fix test
m-abulazm Dec 3, 2025
3ef81cc
do not lazy init and remove unused method
m-abulazm Dec 3, 2025
178fe0f
add validation for vault_type
m-abulazm Dec 3, 2025
0f3e3ff
type test_credential_manager.py
m-abulazm Dec 3, 2025
2f66882
add tests
m-abulazm Dec 4, 2025
fef02c5
remove one unnecessary indirection
m-abulazm Dec 4, 2025
db3c605
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 8, 2025
e87edde
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 8, 2025
235bd1a
show user value error on decode errors
m-abulazm Dec 8, 2025
4a90673
add factory method
m-abulazm Dec 8, 2025
4870029
allow databricks vaults only
m-abulazm Dec 8, 2025
22ad1c1
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 15, 2025
3507cca
rename class and property
m-abulazm Dec 15, 2025
fd793ff
add utility method
m-abulazm Dec 15, 2025
f1fed74
add TODO
m-abulazm Dec 17, 2025
77d3741
add unit test after introducing fixtures to remove duplicate code war…
m-abulazm Dec 17, 2025
ed20038
add missing spec parameter needed for fixture
m-abulazm Dec 17, 2025
63a7104
fmt
m-abulazm Dec 18, 2025
3c6f6f8
Use configurable credentials in reconcile in place of db secret scope…
m-abulazm Dec 22, 2025
45e4c63
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 22, 2025
2723dc2
[WIP] introduce credentials.py module and migrate secret scope to creds
m-abulazm Dec 22, 2025
234185f
[WIP] handle snowflake auth
m-abulazm Dec 22, 2025
4ae37c5
fix snowflake auth handling
m-abulazm Dec 22, 2025
611aef1
make credentials optional and create test fixtures correctly for gree…
m-abulazm Dec 22, 2025
ac95e52
fix serializing dataclass in production and tests
m-abulazm Dec 24, 2025
204af6b
add more tests
m-abulazm Dec 24, 2025
864c7fa
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 24, 2025
9f6d58c
introduce oracle specific fixture for better readability
m-abulazm Dec 24, 2025
fe90e43
add test if both auth methods are there
m-abulazm Dec 25, 2025
cf4fe77
Merge branch 'main' into refactor/creds-manager
m-abulazm Dec 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@
from databricks.labs.lakebridge.connections.credential_manager import (
cred_file as creds,
CredentialManager,
create_credential_manager,
)
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
from databricks.labs.lakebridge.assessments import CONNECTOR_REQUIRED

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -44,8 +42,8 @@ def __init__(
def _configure_credentials(self) -> str:
pass

@staticmethod
def _test_connection(source: str, cred_manager: CredentialManager):
def _test_connection(self, source: str):
cred_manager = CredentialManager.from_file(self._credential_file)
config = cred_manager.get_credentials(source)

try:
Expand All @@ -67,9 +65,7 @@ def run(self):
logger.info(f"{source.capitalize()} details and credentials received.")
if CONNECTOR_REQUIRED.get(self._source_name, True):
if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
cred_manager = create_credential_manager("lakebridge", EnvGetter())
if cred_manager:
self._test_connection(source, cred_manager)
self._test_connection(source)
logger.info(f"{source.capitalize()} Assessment Configuration Completed")


Expand Down
5 changes: 2 additions & 3 deletions src/databricks/labs/lakebridge/assessments/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from databricks.labs.lakebridge.assessments.profiler_config import PipelineConfig
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
from databricks.labs.lakebridge.connections.credential_manager import (
create_credential_manager,
CredentialManager,
)
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
from databricks.labs.lakebridge.assessments import (
PRODUCT_NAME,
PRODUCT_PATH_PREFIX,
Expand Down Expand Up @@ -62,7 +61,7 @@ def profile(
def _setup_extractor(platform: str) -> DatabaseManager | None:
if not CONNECTOR_REQUIRED[platform]:
return None
cred_manager = create_credential_manager(PRODUCT_NAME, EnvGetter())
cred_manager = CredentialManager.from_product_name(PRODUCT_NAME)
connect_config = cred_manager.get_credentials(platform)
return DatabaseManager(platform, connect_config)

Expand Down
19 changes: 16 additions & 3 deletions src/databricks/labs/lakebridge/config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from dataclasses import dataclass, asdict
from enum import Enum, auto
from pathlib import Path
from typing import Any, Literal, TypeVar, cast

from databricks.labs.blueprint.installation import JsonValue
from databricks.labs.blueprint.tui import Prompts

from databricks.labs.lakebridge.reconcile.connectors.credentials import build_recon_creds, ReconcileCredentialsConfig
from databricks.labs.lakebridge.transpiler.transpile_status import TranspileError
from databricks.labs.lakebridge.reconcile.recon_config import Table

Expand Down Expand Up @@ -254,13 +256,24 @@ class ReconcileMetadataConfig:
@dataclass
class ReconcileConfig:
__file__ = "reconcile.yml"
__version__ = 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For version bumps we really need a specific unit test to cover the version upgrades. (It's unfortunately a very error-prone path, and bugs in blueprint don't help.)

Although I can see some tests that include upgrades as part of their fixture, we really need to narrowly target just the migration of this config. (Let me know if you need any help setting this up.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is implicitly tested in some of the tests. if someone removes those tests, we would see it in the coverage

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm aware it's implicitly tested, but we still need tests that specific target the migration: as I mentioned it's a very error-prone path that has historically caused us a lot of trouble.

Copy link
Contributor Author

@m-abulazm m-abulazm Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__version__ = 2

data_source: str
report_type: str
secret_scope: str
database_config: DatabaseConfig
metadata_config: ReconcileMetadataConfig
creds: ReconcileCredentialsConfig | None = None
# databricks does not require creds

@classmethod
def v1_migrate(cls, raw: dict[str, JsonValue]) -> dict[str, JsonValue]:
secret_scope = str(raw.pop("secret_scope"))
data_source = str(raw["data_source"])
maybe_creds = build_recon_creds(data_source, secret_scope)
if maybe_creds:
raw["creds"] = asdict(maybe_creds)
raw["version"] = 2
return raw


@dataclass
Expand Down
93 changes: 76 additions & 17 deletions src/databricks/labs/lakebridge/connections/credential_manager.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from pathlib import Path
import logging
from typing import Protocol
import base64

import yaml

from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound

from databricks.labs.lakebridge.connections.env_getter import EnvGetter


Expand Down Expand Up @@ -32,18 +36,69 @@ def get_secret(self, key: str) -> str:
return key


class DatabricksSecretProvider:
class DatabricksSecretProvider(SecretProvider):
def __init__(self, ws: WorkspaceClient):
self._ws = ws

def get_secret(self, key: str) -> str:
raise NotImplementedError("Databricks secret vault not implemented")
"""Get the secret value given a secret scope & secret key.

Args:
key: key in the format 'scope/secret'
Returns:
The secret value.

Raises:
ValueError: The secret key must be in the format 'scope/secret'.
KeyError: The secret could not be found.
"""
match key.split(sep="/", maxsplit=3):
case _scope, _key_only:
scope = _scope
key_only = _key_only
case _:
msg = f"Secret key must be in the format 'scope/secret': Got {key}"
raise ValueError(msg)

try:
secret = self._ws.secrets.get_secret(scope, key_only)
assert secret.value is not None
return base64.b64decode(secret.value).decode("utf-8")
except NotFound as e:
# TODO do not raise KeyError and standardize across all secret providers. Caller should handle missing secrets.
raise KeyError(f'Secret does not exist with scope: {scope} and key: {key_only}') from e
except UnicodeDecodeError as e:
msg = f"Secret {key} has Base64 bytes that cannot be decoded to UTF-8 string"
raise ValueError(msg) from e


class CredentialManager:
def __init__(self, credentials: dict, secret_providers: dict[str, SecretProvider]):
self._credentials = credentials
self._default_vault = self._credentials.get('secret_vault_type', 'local').lower()
self._provider = secret_providers.get(self._default_vault)
if not self._provider:
raise ValueError(f"Unsupported secret vault type: {self._default_vault}")

@classmethod
def from_product_name(cls, product_name: str, ws: WorkspaceClient | None = None) -> "CredentialManager":
path = cred_file(product_name)
credentials = _load_credentials(path)
return cls.from_credentials(credentials, ws)

@classmethod
def from_file(cls, path: Path, ws: WorkspaceClient | None = None) -> "CredentialManager":
credentials = _load_credentials(path)
return cls.from_credentials(credentials, ws)

@classmethod
def from_credentials(cls, credentials: dict, ws: WorkspaceClient | None = None) -> "CredentialManager":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we type-hint the credentials properly?

It looks to me like it's RootJsonValue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no nesting or arrays in our dict. RootJsonValue allows for this which is different from what we need; just a mapping from string to string

secret_providers: dict[str, SecretProvider] = {
'local': LocalSecretProvider(),
'env': EnvSecretProvider(EnvGetter()),
}

if ws:
secret_providers['databricks'] = DatabricksSecretProvider(ws)
return cls(credentials, secret_providers)

def get_credentials(self, source: str) -> dict:
if source not in self._credentials:
Expand All @@ -60,6 +115,23 @@ def _get_secret_value(self, key: str) -> str:
return self._provider.get_secret(key)


def build_credentials(vault_type: str, source: str, credentials: dict) -> dict:
"""Build credentials dictionary with secret vault type included.

Args:
vault_type: The type of secret vault (e.g., 'local', 'databricks').
source: The source system name.
credentials: The original credentials dictionary.

Returns:
A new credentials dictionary including the secret vault type.
"""
return {
source: credentials,
'secret_vault_type': vault_type.lower(),
}


def _get_home() -> Path:
return Path(__file__).home()

Expand All @@ -74,16 +146,3 @@ def _load_credentials(path: Path) -> dict:
return yaml.safe_load(f)
except FileNotFoundError as e:
raise FileNotFoundError(f"Credentials file not found at {path}") from e


def create_credential_manager(product_name: str, env_getter: EnvGetter) -> CredentialManager:
creds_path = cred_file(product_name)
creds = _load_credentials(creds_path)

secret_providers = {
'local': LocalSecretProvider(),
'env': EnvSecretProvider(env_getter),
'databricks': DatabricksSecretProvider(),
}

return CredentialManager(creds, secret_providers)
9 changes: 5 additions & 4 deletions src/databricks/labs/lakebridge/deployment/recon.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ def uninstall(self, recon_config: ReconcileConfig | None):
f"Won't remove reconcile metadata schema `{recon_config.metadata_config.schema}` "
f"from catalog `{recon_config.metadata_config.catalog}`. Please remove it and the tables inside manually."
)
logging.info(
f"Won't remove configured reconcile secret scope `{recon_config.secret_scope}`. "
f"Please remove it manually."
)
if recon_config.creds:
logging.info(
f"Won't remove configured reconcile credentials from `{recon_config.creds.vault_type}`. "
f"Please remove it manually."
)

def _deploy_tables(self, recon_config: ReconcileConfig):
logger.info("Deploying reconciliation metadata tables.")
Expand Down
6 changes: 4 additions & 2 deletions src/databricks/labs/lakebridge/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from databricks.labs.lakebridge.contexts.application import ApplicationContext
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
from databricks.labs.lakebridge.reconcile.connectors.credentials import build_recon_creds
from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
from databricks.labs.lakebridge.transpiler.installers import (
BladebridgeInstaller,
Expand Down Expand Up @@ -325,18 +326,19 @@ def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig:
report_type = self._prompts.choice(
"Select the report type:", [report_type.value for report_type in ReconReportType]
)
scope_name = self._prompts.question(
scope_name = self._prompts.question( # TODO deprecate
f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets",
default=f"remorph_{data_source}",
)
creds = build_recon_creds(data_source, scope_name)

db_config = self._prompt_for_reconcile_database_config(data_source)
metadata_config = self._prompt_for_reconcile_metadata_config()

return ReconcileConfig(
data_source=data_source,
report_type=report_type,
secret_scope=scope_name,
creds=creds,
database_config=db_config,
metadata_config=metadata_config,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import logging
from dataclasses import dataclass

from databricks.sdk import WorkspaceClient

from databricks.labs.lakebridge.connections.credential_manager import build_credentials, CredentialManager

logger = logging.getLogger(__name__)


@dataclass
class ReconcileCredentialsConfig:
vault_type: str
vault_secret_names: dict[str, str]

def __post_init__(self):
if self.vault_type != "databricks":
raise ValueError(f"Unsupported vault_type: {self.vault_type}")


_REQUIRED_JDBC_CREDS = [
"host",
"port",
"database",
"user",
"password",
]

_TSQL_REQUIRED_CREDS = [*_REQUIRED_JDBC_CREDS, "encrypt", "trustServerCertificate"]

_ORACLE_REQUIRED_CREDS = [*_REQUIRED_JDBC_CREDS]

_SNOWFLAKE_REQUIRED_CREDS = [
"sfUser",
"sfUrl",
"sfDatabase",
"sfSchema",
"sfWarehouse",
"sfRole",
# sfPassword is not required here; auth is validated separately
]

_SOURCE_CREDENTIALS_MAP = {
"databricks": [],
"snowflake": _SNOWFLAKE_REQUIRED_CREDS,
"oracle": _ORACLE_REQUIRED_CREDS,
"mssql": _TSQL_REQUIRED_CREDS,
"synapse": _TSQL_REQUIRED_CREDS,
}


def build_recon_creds(source: str, secret_scope: str) -> ReconcileCredentialsConfig | None:
if source == "databricks":
return None

keys = _SOURCE_CREDENTIALS_MAP.get(source)
if not keys:
raise ValueError(f"Unsupported source system: {source}")
parsed = {key: f"{secret_scope}/{key}" for key in keys}

if source == "snowflake":
logger.warning("Please specify the Snowflake authentication method in the credentials config.")
parsed["pem_private_key"] = f"{secret_scope}/pem_private_key"
parsed["sfPassword"] = f"{secret_scope}/sfPassword"

return ReconcileCredentialsConfig("databricks", parsed)


def validate_creds(creds: ReconcileCredentialsConfig, source: str) -> None:
required_keys = _SOURCE_CREDENTIALS_MAP.get(source)
if not required_keys:
raise ValueError(f"Unsupported source system: {source}")

missing = [k for k in required_keys if not creds.vault_secret_names.get(k)]
if missing:
raise ValueError(
f"Missing mandatory {source} credentials. " f"Please configure all of {required_keys}. Missing: {missing}"
)


def load_and_validate_credentials(
creds: ReconcileCredentialsConfig,
ws: WorkspaceClient,
source: str,
) -> dict[str, str]:
validate_creds(creds, source)

parsed = build_credentials(creds.vault_type, source, creds.vault_secret_names)
resolved = CredentialManager.from_credentials(parsed, ws).get_credentials(source)
return resolved
Loading
Loading