diff --git a/docs/lakebridge/docs/reconcile/index.mdx b/docs/lakebridge/docs/reconcile/index.mdx index 8a111d5162..a1ee8f8bd8 100644 --- a/docs/lakebridge/docs/reconcile/index.mdx +++ b/docs/lakebridge/docs/reconcile/index.mdx @@ -38,36 +38,38 @@ Refer to [Reconcile Configuration Guide](reconcile_configuration) for detailed i > 2. Setup the connection properties -Lakebridge-Reconcile manages connection properties by utilizing secrets stored in the Databricks workspace. -Below is the default secret naming convention for managing connection properties. - -**Note: When both the source and target are Databricks, a secret scope is not required.** - -**Default Secret Scope:** lakebridge_data_source - -| source | scope | -|---------------|-----------------------| -| snowflake | lakebridge_snowflake | -| oracle | lakebridge_oracle | -| databricks | lakebridge_databricks | -| mssql | lakebridge_mssql | -| synapse | lakebridge_synapse | - -Below are the connection properties required for each source: - +Reconcile connection properties are configured through a dynamic mapping from connection property to value. +The values can be loaded from databricks, env vars or used directly. It depends on the config in `reconcile.yml` +```yaml +... +creds: + vault_type: local + vault_secret_names: + +``` +or to use databricks secrets. And the value has to be in the form of `/` +```yaml +... +creds: + vault_type: databricks + vault_secret_names: + some_property = / + ... +``` +The expected connection properties under `vault_secret_names` per data source are: ```yaml - sfUrl = https://[acount_name].snowflakecomputing.com - account = [acount_name] - sfUser = [user] - sfPassword = [password] - sfDatabase = [database] - sfSchema = [schema] - sfWarehouse = [warehouse_name] - sfRole = [role_name] - pem_private_key = [pkcs8_pem_private_key] - pem_private_key_password = [pkcs8_pem_private_key] + sfUrl = [local_or_databricks_mapping] + account = [local_or_databricks_mapping] + sfUser = [local_or_databricks_mapping] + sfPassword = [local_or_databricks_mapping] + sfDatabase = [local_or_databricks_mapping] + sfSchema = [local_or_databricks_mapping] + sfWarehouse = [local_or_databricks_mapping] + sfRole = [local_or_databricks_mapping] + pem_private_key = [local_or_databricks_mapping] + pem_private_key_password = [local_or_databricks_mapping] ``` :::note @@ -81,22 +83,22 @@ Below are the connection properties required for each source: ```yaml - user = [user] - password = [password] - host = [host] - port = [port] - database = [database/SID] + user = [local_or_databricks_mapping] + password = [local_or_databricks_mapping] + host = [local_or_databricks_mapping] + port = [local_or_databricks_mapping] + database = [local_or_databricks_mapping] ``` ```yaml - user = [user] - password = [password] - host = [host] - port = [port] - database = [database/SID] - encrypt = [true/false] - trustServerCertificate = [true/false] + user = [local_or_databricks_mapping] + password = [local_or_databricks_mapping] + host = [local_or_databricks_mapping] + port = [local_or_databricks_mapping] + database = [local_or_databricks_mapping] + encrypt = [local_or_databricks_mapping] + trustServerCertificate = [local_or_databricks_mapping] ``` diff --git a/docs/lakebridge/docs/reconcile/recon_notebook.mdx b/docs/lakebridge/docs/reconcile/recon_notebook.mdx index b2203bbd08..13526f4dc5 100644 --- a/docs/lakebridge/docs/reconcile/recon_notebook.mdx +++ b/docs/lakebridge/docs/reconcile/recon_notebook.mdx @@ -69,15 +69,14 @@ We use the class `ReconcileConfig` to configure the properties required for reco class ReconcileConfig: data_source: str report_type: str - secret_scope: str database_config: DatabaseConfig metadata_config: ReconcileMetadataConfig + creds: ReconcileCredentialsConfig | None = None ``` Parameters: - `data_source`: The data source to be reconciled. Supported values: `snowflake`, `teradata`, `oracle`, `mssql`, `synapse`, `databricks`. - `report_type`: The type of report to be generated. Available report types are `schema`, `row`, `data` or `all`. For details check [here](./dataflow_example.mdx). -- `secret_scope`: The secret scope name used to store the connection credentials for the source database system. - `database_config`: The database configuration for connecting to the source database. expects a `DatabaseConfig` object. - `source_schema`: The source schema name. - `target_catalog`: The target catalog name. @@ -104,6 +103,15 @@ class ReconcileMetadataConfig: ``` If not set the default values will be used to store the metadata. The default resources are created during the installation of Lakebridge. +- `creds`: The credentials to use to connect to the data source. + - `vault_type`: Can be local to use the values directly, env to load from env variables or databricks to load from databricks secrets. + - `vault_secret_names`: A mapping of reconcile credentials keys to the values that will be resolved depending on vault type. +```python +@dataclass +class ReconcileCredentialsConfig: + vault_type: str + vault_secret_names: dict[str, str] +``` An Example of configuring the Reconcile properties: @@ -111,13 +119,13 @@ An Example of configuring the Reconcile properties: from databricks.labs.lakebridge.config import ( DatabaseConfig, ReconcileConfig, - ReconcileMetadataConfig + ReconcileMetadataConfig, + ReconcileCredentialsConfig ) reconcile_config = ReconcileConfig( data_source = "snowflake", report_type = "all", - secret_scope = "snowflake-credential", database_config= DatabaseConfig(source_catalog="source_sf_catalog", source_schema="source_sf_schema", target_catalog="target_databricks_catalog", @@ -126,9 +134,14 @@ reconcile_config = ReconcileConfig( metadata_config = ReconcileMetadataConfig( catalog = "lakebridge_metadata", schema= "reconcile" - ) + ), + creds=ReconcileCredentialsConfig( + vault_type="databricks", + vault_secret_names={"sfUrl": "some_secret_scope/some_key", "sfUser": "another_secret_scope/user_key", "sfPassword": "scope/key", "sfRole": "scope/key"} + ) ) ``` +All the expected credentials have to be configured. ## Configure Table Properties diff --git a/docs/lakebridge/docs/reconcile/reconcile_automation.mdx b/docs/lakebridge/docs/reconcile/reconcile_automation.mdx index 80fc668940..0d62825a1f 100644 --- a/docs/lakebridge/docs/reconcile/reconcile_automation.mdx +++ b/docs/lakebridge/docs/reconcile/reconcile_automation.mdx @@ -116,7 +116,7 @@ To run the utility, the following parameters must be set: - `remorph_catalog`: The catalog configured through CLI. - `remorph_schema`: The schema configured through CLI. - `remorph_config_table`: The table configs created as a part of the pre-requisites. -- `secret_scope`: The Databricks secret scope for accessing the source system. Refer to the Lakebridge documentation for the specific keys required to be configured as per the source system. +- `secret_scope`: (Deprecated) The Databricks secret scope for accessing the source system. Refer to the Lakebridge documentation for the specific keys required to be configured as per the source system. - `source_system`: The source system against which reconciliation is performed. - `table_recon_summary`: The target summary table created as a part of the pre-requisites. diff --git a/docs/lakebridge/static/lakebridge_reconcile/lakebridge_recon_main.html b/docs/lakebridge/static/lakebridge_reconcile/lakebridge_recon_main.html index 3f4ab49382..6ab8a6eeb0 100644 --- a/docs/lakebridge/static/lakebridge_reconcile/lakebridge_recon_main.html +++ b/docs/lakebridge/static/lakebridge_reconcile/lakebridge_recon_main.html @@ -1,8 +1,7 @@ - - + lakebridge_recon_main - Databricks @@ -11,16 +10,16 @@ - - - - + + + + diff --git a/docs/lakebridge/static/lakebridge_reconciliation.dbc b/docs/lakebridge/static/lakebridge_reconciliation.dbc index a320ebc2b3..5547db7688 100644 Binary files a/docs/lakebridge/static/lakebridge_reconciliation.dbc and b/docs/lakebridge/static/lakebridge_reconciliation.dbc differ diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index bb76743b66..a042803dfe 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -27,7 +27,6 @@ from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1 from databricks.labs.lakebridge.contexts.application import ApplicationContext from databricks.labs.lakebridge.connections.credential_manager import cred_file -from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver from databricks.labs.lakebridge.install import installer from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner @@ -699,18 +698,6 @@ def generate_lineage( lineage_generator(engine, source_dialect, input_source, output_folder) -@lakebridge.command -def configure_secrets(*, w: WorkspaceClient) -> None: - """Setup reconciliation connection profile details as Secrets on Databricks Workspace""" - recon_conf = ReconConfigPrompts(w) - - # Prompt for source - source = recon_conf.prompt_source() - - logger.info(f"Setting up Scope, Secrets for `{source}` reconciliation") - recon_conf.prompt_and_save_connection_details() - - @lakebridge.command def configure_database_profiler(w: WorkspaceClient) -> None: """[Experimental] Installs and runs the Lakebridge Assessment package for database profiling""" diff --git a/src/databricks/labs/lakebridge/helpers/recon_config_utils.py b/src/databricks/labs/lakebridge/helpers/recon_config_utils.py index e798edbf77..90b5ccfe8b 100644 --- a/src/databricks/labs/lakebridge/helpers/recon_config_utils.py +++ b/src/databricks/labs/lakebridge/helpers/recon_config_utils.py @@ -1,100 +1,20 @@ import logging from databricks.labs.blueprint.tui import Prompts + +from databricks.labs.lakebridge.reconcile.connectors.credentials import ReconcileCredentialsConfig from databricks.labs.lakebridge.reconcile.constants import ReconSourceType from databricks.sdk import WorkspaceClient -from databricks.sdk.errors.platform import ResourceDoesNotExist logger = logging.getLogger(__name__) class ReconConfigPrompts: def __init__(self, ws: WorkspaceClient, prompts: Prompts = Prompts()): - self._source = None self._prompts = prompts self._ws = ws - def _scope_exists(self, scope_name: str) -> bool: - scope_exists = scope_name in [scope.name for scope in self._ws.secrets.list_scopes()] - - if not scope_exists: - logger.error( - f"Error: Cannot find Secret Scope: `{scope_name}` in Databricks Workspace." - f"\nUse `remorph configure-secrets` to setup Scope and Secrets" - ) - return False - logger.debug(f"Found Scope: `{scope_name}` in Databricks Workspace") - return True - - def _ensure_scope_exists(self, scope_name: str): - """ - Get or Create a new Scope in Databricks Workspace - :param scope_name: - """ - scope_exists = self._scope_exists(scope_name) - if not scope_exists: - allow_scope_creation = self._prompts.confirm("Do you want to create a new one?") - if not allow_scope_creation: - msg = "Scope is needed to store Secrets in Databricks Workspace" - raise SystemExit(msg) - - try: - logger.debug(f" Creating a new Scope: `{scope_name}`") - self._ws.secrets.create_scope(scope_name) - except Exception as ex: - logger.error(f"Exception while creating Scope `{scope_name}`: {ex}") - raise ex - - logger.info(f" Created a new Scope: `{scope_name}`") - logger.info(f" Using Scope: `{scope_name}`...") - - def _secret_key_exists(self, scope_name: str, secret_key: str) -> bool: - try: - self._ws.secrets.get_secret(scope_name, secret_key) - logger.info(f"Found Secret key `{secret_key}` in Scope `{scope_name}`") - return True - except ResourceDoesNotExist: - logger.debug(f"Secret key `{secret_key}` not found in Scope `{scope_name}`") - return False - - def _store_secret(self, scope_name: str, secret_key: str, secret_value: str): - try: - logger.debug(f"Storing Secret: *{secret_key}* in Scope: `{scope_name}`") - self._ws.secrets.put_secret(scope=scope_name, key=secret_key, string_value=secret_value) - except Exception as ex: - logger.error(f"Exception while storing Secret `{secret_key}`: {ex}") - raise ex - - def store_connection_secrets(self, scope_name: str, conn_details: tuple[str, dict[str, str]]): - engine = conn_details[0] - secrets = conn_details[1] - - logger.debug(f"Storing `{engine}` Connection Secrets in Scope: `{scope_name}`") - - for key, value in secrets.items(): - secret_key = key - logger.debug(f"Processing Secret: *{secret_key}*") - debug_op = "Storing" - info_op = "Stored" - if self._secret_key_exists(scope_name, secret_key): - overwrite_secret = self._prompts.confirm(f"Do you want to overwrite `{secret_key}`?") - if not overwrite_secret: - continue - debug_op = "Overwriting" - info_op = "Overwritten" - - logger.debug(f"{debug_op} Secret: *{secret_key}* in Scope: `{scope_name}`") - self._store_secret(scope_name, secret_key, value) - logger.info(f"{info_op} Secret: *{secret_key}* in Scope: `{scope_name}`") - - def prompt_source(self): - source = self._prompts.choice( - "Select the source dialect", [source_type.value for source_type in ReconSourceType] - ) - self._source = source - return source - - def _prompt_snowflake_connection_details(self) -> tuple[str, dict[str, str]]: + def _prompt_snowflake_connection_details(self) -> dict[str, str]: """ Prompt for Snowflake connection details :return: tuple[str, dict[str, str]] @@ -103,30 +23,40 @@ def _prompt_snowflake_connection_details(self) -> tuple[str, dict[str, str]]: f"Please answer a couple of questions to configure `{ReconSourceType.SNOWFLAKE.value}` Connection profile" ) - sf_url = self._prompts.question("Enter Snowflake URL") - account = self._prompts.question("Enter Account Name") - sf_user = self._prompts.question("Enter User") - sf_password = self._prompts.question("Enter Password") - sf_db = self._prompts.question("Enter Database") - sf_schema = self._prompts.question("Enter Schema") - sf_warehouse = self._prompts.question("Enter Snowflake Warehouse") - sf_role = self._prompts.question("Enter Role", default=" ") + sf_url = self._prompts.question("Enter Snowflake URL Secret Name") + sf_user = self._prompts.question("Enter User Secret Name") + password_dict = {} + sf_password = self._prompts.question( + "Enter Password Secret Name or use `None` to use key-based auth", default="None" + ) + if sf_password.lower() == "none": + logger.info("Proceeding with PEM Private Key authentication...") + sf_pem_key = self._prompts.question("Enter PEM Private Key Secret Name") + password_dict["pem_private_key"] = sf_pem_key + sf_pem_key_password = self._prompts.question( + "Enter PEM Private Key Password Secret Name or use `None`", default="None" + ) + if sf_pem_key_password.lower() == "none": + password_dict["pem_private_key_password"] = sf_pem_key_password + else: + password_dict["sfPassword"] = sf_password + sf_db = self._prompts.question("Enter Database Secret Name") + sf_schema = self._prompts.question("Enter Schema Secret Name") + sf_warehouse = self._prompts.question("Enter Snowflake Warehouse Secret Name") + sf_role = self._prompts.question("Enter Role Secret Name") sf_conn_details = { "sfUrl": sf_url, - "account": account, "sfUser": sf_user, - "sfPassword": sf_password, "sfDatabase": sf_db, "sfSchema": sf_schema, "sfWarehouse": sf_warehouse, "sfRole": sf_role, - } + } | password_dict - sf_conn_dict = (ReconSourceType.SNOWFLAKE.value, sf_conn_details) - return sf_conn_dict + return sf_conn_details - def _prompt_oracle_connection_details(self) -> tuple[str, dict[str, str]]: + def _prompt_oracle_connection_details(self) -> dict[str, str]: """ Prompt for Oracle connection details :return: tuple[str, dict[str, str]] @@ -134,43 +64,58 @@ def _prompt_oracle_connection_details(self) -> tuple[str, dict[str, str]]: logger.info( f"Please answer a couple of questions to configure `{ReconSourceType.ORACLE.value}` Connection profile" ) - user = self._prompts.question("Enter User") - password = self._prompts.question("Enter Password") - host = self._prompts.question("Enter host") - port = self._prompts.question("Enter port") - database = self._prompts.question("Enter database/SID") + user = self._prompts.question("Enter User Secret Name") + password = self._prompts.question("Enter Password Secret Name") + host = self._prompts.question("Enter host Secret Name") + port = self._prompts.question("Enter port Secret Name") + database = self._prompts.question("Enter database/SID Secret Name") oracle_conn_details = {"user": user, "password": password, "host": host, "port": port, "database": database} - oracle_conn_dict = (ReconSourceType.ORACLE.value, oracle_conn_details) - return oracle_conn_dict + return oracle_conn_details - def _connection_details(self): + def _prompt_mssql_connection_details(self) -> dict[str, str]: """ - Prompt for connection details based on the source - :return: None + Prompt for Oracle connection details + :return: tuple[str, dict[str, str]] """ - logger.debug(f"Prompting for `{self._source}` connection details") - match self._source: + logger.info( + f"Please answer a couple of questions to configure `{ReconSourceType.MSSQL.value}`/`{ReconSourceType.SYNAPSE.value}` Connection profile" + ) + user = self._prompts.question("Enter User Secret Name") + password = self._prompts.question("Enter Password Secret Name") + host = self._prompts.question("Enter host Secret Name") + port = self._prompts.question("Enter port Secret Name") + database = self._prompts.question("Enter database Secret Name") + encrypt = self._prompts.question("Enter Encrypt Secret Name") + trust_server_certificate = self._prompts.question("Enter Trust Server Certificate Secret Name") + + tsql_conn_details = { + "user": user, + "password": password, + "host": host, + "port": port, + "database": database, + "encrypt": encrypt, + "trustServerCertificate": trust_server_certificate, + } + + return tsql_conn_details + + def _connection_details(self, source: str): + logger.debug(f"Prompting for `{source}` connection details") + match source: case ReconSourceType.SNOWFLAKE.value: return self._prompt_snowflake_connection_details() case ReconSourceType.ORACLE.value: return self._prompt_oracle_connection_details() - - def prompt_and_save_connection_details(self): - """ - Prompt for connection details and save them as Secrets in Databricks Workspace - """ - # prompt for connection_details only if source is other than Databricks - if self._source == ReconSourceType.DATABRICKS.value: - logger.info("*Databricks* as a source is supported only for **Hive MetaStore (HMS) setup**") - return - - # Prompt for secret scope - scope_name = self._prompts.question("Enter Secret Scope name") - self._ensure_scope_exists(scope_name) - - # Prompt for connection details - connection_details = self._connection_details() - logger.debug(f"Storing `{self._source}` connection details as Secrets in Databricks Workspace...") - self.store_connection_secrets(scope_name, connection_details) + case ReconSourceType.MSSQL.value | ReconSourceType.SYNAPSE.value: + return self._prompt_mssql_connection_details() + + def prompt_recon_creds(self, source: str) -> ReconcileCredentialsConfig: + logger.info("Please provide secret names in the following steps in the format /") + connection_details = self._connection_details(source) + return ReconcileCredentialsConfig( + vault_type="databricks", + vault_secret_names=connection_details, + ) diff --git a/src/databricks/labs/lakebridge/install.py b/src/databricks/labs/lakebridge/install.py index ea4458f893..cf5e73b5ce 100644 --- a/src/databricks/labs/lakebridge/install.py +++ b/src/databricks/labs/lakebridge/install.py @@ -24,7 +24,7 @@ from databricks.labs.lakebridge.contexts.application import ApplicationContext from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation -from databricks.labs.lakebridge.reconcile.connectors.credentials import build_recon_creds +from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType from databricks.labs.lakebridge.transpiler.installers import ( BladebridgeInstaller, @@ -48,6 +48,7 @@ def __init__( # pylint: disable=too-many-arguments install_state: InstallState, product_info: ProductInfo, resource_configurator: ResourceConfigurator, + recon_creds_prompts: ReconConfigPrompts, workspace_installation: WorkspaceInstallation, environ: dict[str, str] | None = None, *, @@ -65,6 +66,7 @@ def __init__( # pylint: disable=too-many-arguments self._install_state = install_state self._product_info = product_info self._resource_configurator = resource_configurator + self._recon_creds_prompts = recon_creds_prompts self._ws_installation = workspace_installation # TODO: Refactor the 'prompts' property in preference to using this flag, which should be redundant. self._is_interactive = is_interactive @@ -326,11 +328,10 @@ def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig: report_type = self._prompts.choice( "Select the report type:", [report_type.value for report_type in ReconReportType] ) - scope_name = self._prompts.question( # TODO deprecate - f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets", - default=f"remorph_{data_source}", - ) - creds = build_recon_creds(data_source, scope_name) + if data_source != ReconSourceType.DATABRICKS.value: + creds = self._recon_creds_prompts.prompt_recon_creds(data_source) + else: + creds = None db_config = self._prompt_for_reconcile_database_config(data_source) metadata_config = self._prompt_for_reconcile_metadata_config() @@ -416,6 +417,7 @@ def installer( app_context.install_state, app_context.product_info, app_context.resource_configurator, + ReconConfigPrompts(ws, app_context.prompts), app_context.workspace_installation, transpiler_repository=transpiler_repository, is_interactive=is_interactive, diff --git a/tests/integration/config/test_config.py b/tests/integration/config/test_config.py index afc61c1e5c..57965b2d5e 100644 --- a/tests/integration/config/test_config.py +++ b/tests/integration/config/test_config.py @@ -1,3 +1,5 @@ +from unittest.mock import MagicMock + from databricks.sdk import WorkspaceClient from databricks.labs.blueprint.tui import MockPrompts @@ -26,6 +28,7 @@ def test_stores_and_fetches_config(ws: WorkspaceClient) -> None: context.install_state, context.product_info, context.resource_configurator, + MagicMock(), context.workspace_installation, ) config = TranspileConfig( diff --git a/tests/integration/reconcile/query_builder/test_execute.py b/tests/integration/reconcile/query_builder/test_execute.py index 94edd5ec5c..522527f9e3 100644 --- a/tests/integration/reconcile/query_builder/test_execute.py +++ b/tests/integration/reconcile/query_builder/test_execute.py @@ -1145,7 +1145,7 @@ def mock_for_report_type_all( reconcile_config_all = ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_catalog=CATALOG, source_schema=SCHEMA, @@ -1420,7 +1420,7 @@ def mock_for_report_type_row( reconcile_config_row = ReconcileConfig( data_source="snowflake", report_type="row", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_catalog=CATALOG, source_schema=SCHEMA, diff --git a/tests/integration/reconcile/test_oracle_reconcile.py b/tests/integration/reconcile/test_oracle_reconcile.py index 5203504619..1deedca12c 100644 --- a/tests/integration/reconcile/test_oracle_reconcile.py +++ b/tests/integration/reconcile/test_oracle_reconcile.py @@ -55,7 +55,7 @@ def test_oracle_db_reconcile(mock_spark, mock_workspace_client, tmp_path): reconcile_config = ReconcileConfig( data_source="oracle", report_type=report, - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=db_config, metadata_config=ReconcileMetadataConfig(catalog="tmp", schema="reconcile"), ) diff --git a/tests/unit/deployment/test_installation.py b/tests/unit/deployment/test_installation.py index 61647fac71..766905499d 100644 --- a/tests/unit/deployment/test_installation.py +++ b/tests/unit/deployment/test_installation.py @@ -56,7 +56,7 @@ def test_install_all(ws): reconcile_config = ReconcileConfig( data_source="oracle", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_schema="tpch_sf10006", target_catalog="tpch6", @@ -111,7 +111,7 @@ def test_recon_component_installation(ws): reconcile_config = ReconcileConfig( data_source="oracle", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_schema="tpch_sf10008", target_catalog="tpch8", @@ -194,7 +194,7 @@ def test_uninstall_configs_exist(ws): reconcile_config = ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_catalog="snowflake_sample_data1", source_schema="tpch_sf10001", diff --git a/tests/unit/deployment/test_job.py b/tests/unit/deployment/test_job.py index 3e60a8058f..f24413e23c 100644 --- a/tests/unit/deployment/test_job.py +++ b/tests/unit/deployment/test_job.py @@ -23,7 +23,7 @@ def oracle_recon_config() -> ReconcileConfig: return ReconcileConfig( data_source="oracle", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_schema="tpch_sf10009", target_catalog="tpch9", @@ -42,7 +42,7 @@ def snowflake_recon_config() -> ReconcileConfig: return ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_schema="tpch_sf10009", target_catalog="tpch9", diff --git a/tests/unit/deployment/test_recon.py b/tests/unit/deployment/test_recon.py index a53a82134b..23692d57c7 100644 --- a/tests/unit/deployment/test_recon.py +++ b/tests/unit/deployment/test_recon.py @@ -57,7 +57,7 @@ def test_install(ws): reconcile_config = ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_catalog="snowflake_sample_data4", source_schema="tpch_sf10004", @@ -150,7 +150,7 @@ def test_uninstall(ws): recon_config = ReconcileConfig( data_source="snowflake", report_type="all", - creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"__secret_scope": "fake"}), + creds=ReconcileCredentialsConfig(vault_type="databricks", vault_secret_names={"fake": "fake"}), database_config=DatabaseConfig( source_catalog="snowflake_sample_data5", source_schema="tpch_sf10005", diff --git a/tests/unit/helpers/test_recon_config_utils.py b/tests/unit/helpers/test_recon_config_utils.py index 84558295b3..8519f7aa09 100644 --- a/tests/unit/helpers/test_recon_config_utils.py +++ b/tests/unit/helpers/test_recon_config_utils.py @@ -1,145 +1,77 @@ -from unittest.mock import patch - -import pytest - from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts -from databricks.sdk.errors.platform import ResourceDoesNotExist -from databricks.sdk.service.workspace import SecretScope -SOURCE_DICT = {"databricks": "0", "mssql": "1", "oracle": "2", "snowflake": "3", "synapse": "4"} -SCOPE_NAME = "dummy_scope" +from databricks.labs.lakebridge.reconcile.constants import ReconSourceType -def test_configure_secrets_snowflake_overwrite(mock_workspace_client): +def test_configure_secrets_snowflake(mock_workspace_client): prompts = MockPrompts( { - r"Select the source": SOURCE_DICT["snowflake"], - r"Enter Secret Scope name": SCOPE_NAME, + r"Enter secret vault type": "0", r"Enter Snowflake URL": "dummy", - r"Enter Account Name": "dummy", r"Enter User": "dummy", - r"Enter Password": "dummy", + r"Enter Password*": "dummy", r"Enter Database": "dummy", r"Enter Schema": "dummy", r"Enter Snowflake Warehouse": "dummy", r"Enter Role": "dummy", - r"Do you want to overwrite.*": "yes", } ) - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name=SCOPE_NAME)]] recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - recon_conf.prompt_and_save_connection_details() + recon_conf.prompt_recon_creds(ReconSourceType.SNOWFLAKE.value) -def test_configure_secrets_oracle_insert(mock_workspace_client): - # mock prompts for Oracle +def test_configure_secrets_snowflake_pem(mock_workspace_client): prompts = MockPrompts( { - r"Select the source": SOURCE_DICT["oracle"], - r"Enter Secret Scope name": SCOPE_NAME, - r"Do you want to create a new one?": "yes", + r"Enter secret vault type": "0", + r"Enter Snowflake URL": "dummy", r"Enter User": "dummy", - r"Enter Password": "dummy", - r"Enter host": "dummy", - r"Enter port": "dummy", - r"Enter database/SID": "dummy", - } - ) - - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name="scope_name")]] - - with patch( - "databricks.labs.lakebridge.helpers.recon_config_utils.ReconConfigPrompts._secret_key_exists", - return_value=False, - ): - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - recon_conf.prompt_and_save_connection_details() - - -def test_configure_secrets_invalid_source(mock_workspace_client): - prompts = MockPrompts( - { - r"Select the source": "100", # Invalid source - r"Enter Secret Scope name": SCOPE_NAME, - } - ) - - with patch( - "databricks.labs.lakebridge.helpers.recon_config_utils.ReconConfigPrompts._scope_exists", - return_value=True, - ): - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - with pytest.raises(ValueError, match="cannot get answer within 10 attempt"): - recon_conf.prompt_source() - - -def test_store_connection_secrets_exception(mock_workspace_client): - prompts = MockPrompts( - { - r"Do you want to overwrite `source_key`?": "no", - } - ) - - mock_workspace_client.secrets.get_secret.side_effect = ResourceDoesNotExist("Not Found") - mock_workspace_client.secrets.put_secret.side_effect = Exception("Timed out") - - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - - with pytest.raises(Exception, match="Timed out"): - recon_conf.store_connection_secrets("scope_name", ("source", {"key": "value"})) - - -def test_configure_secrets_no_scope(mock_workspace_client): - prompts = MockPrompts( - { - r"Select the source": SOURCE_DICT["snowflake"], - r"Enter Secret Scope name": SCOPE_NAME, - r"Do you want to create a new one?": "no", + r"Enter Password*": "none", + r"Enter PEM*": "dummy", + r"Enter PEM*Password*": "none", + r"Enter Database": "dummy", + r"Enter Schema": "dummy", + r"Enter Snowflake Warehouse": "dummy", + r"Enter Role": "dummy", } ) - - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name="scope_name")]] - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() + recon_conf.prompt_recon_creds(ReconSourceType.SNOWFLAKE.value) - with pytest.raises(SystemExit, match="Scope is needed to store Secrets in Databricks Workspace"): - recon_conf.prompt_and_save_connection_details() - -def test_configure_secrets_create_scope_exception(mock_workspace_client): +def test_configure_secrets_oracle(mock_workspace_client): + # mock prompts for Oracle prompts = MockPrompts( { - r"Select the source": SOURCE_DICT["snowflake"], - r"Enter Secret Scope name": SCOPE_NAME, + r"Enter secret vault type": "1", r"Do you want to create a new one?": "yes", + r"Enter User": "dummy", + r"Enter Password": "dummy", + r"Enter host": "dummy", + r"Enter port": "dummy", + r"Enter database/SID": "dummy", } ) - mock_workspace_client.secrets.list_scopes.side_effect = [[SecretScope(name="scope_name")]] - mock_workspace_client.secrets.create_scope.side_effect = Exception("Network Error") - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - with pytest.raises(Exception, match="Network Error"): - recon_conf.prompt_and_save_connection_details() + recon_conf.prompt_recon_creds(ReconSourceType.ORACLE.value) -def test_store_connection_secrets_overwrite(mock_workspace_client): +def test_configure_secrets_tsql(mock_workspace_client): prompts = MockPrompts( { - r"Do you want to overwrite `key`?": "no", + r"Enter secret vault type": "2", + r"Enter User": "dummy", + r"Enter Password": "dummy", + r"Enter host": "dummy", + r"Enter port": "dummy", + r"Enter database": "dummy", + r"Enter Encrypt": "dummy", + r"Enter Trust Server Certificate": "dummy", } ) - with patch( - "databricks.labs.lakebridge.helpers.recon_config_utils.ReconConfigPrompts._secret_key_exists", return_value=True - ): - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.store_connection_secrets("scope_name", ("source", {"key": "value"})) + recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) + recon_conf.prompt_recon_creds(ReconSourceType.MSSQL.value) + recon_conf.prompt_recon_creds(ReconSourceType.SYNAPSE.value) diff --git a/tests/unit/test_cli_other.py b/tests/unit/test_cli_other.py index 5d184bffc0..c2a6568709 100644 --- a/tests/unit/test_cli_other.py +++ b/tests/unit/test_cli_other.py @@ -6,21 +6,6 @@ from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.lakebridge import cli from databricks.labs.lakebridge.config import LSPConfigOptionV1, LSPPromptMethod -from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts - - -def test_configure_secrets_databricks(mock_workspace_client): - source_dict = {"databricks": "0", "netezza": "1", "oracle": "2", "snowflake": "3"} - prompts = MockPrompts( - { - r"Select the source": source_dict["databricks"], - } - ) - - recon_conf = ReconConfigPrompts(mock_workspace_client, prompts) - recon_conf.prompt_source() - - recon_conf.prompt_and_save_connection_details() @pytest.mark.parametrize( @@ -60,12 +45,6 @@ def test_interactive_argument_auto(is_tty: bool) -> None: assert interactive_mode is is_tty -def test_cli_configure_secrets_config(mock_workspace_client): - with patch("databricks.labs.lakebridge.cli.ReconConfigPrompts") as mock_recon_config: - cli.configure_secrets(w=mock_workspace_client) - mock_recon_config.assert_called_once_with(mock_workspace_client) - - def test_cli_reconcile(mock_workspace_client): with patch("databricks.labs.lakebridge.reconcile.runner.ReconcileRunner.run", return_value=True): cli.reconcile(w=mock_workspace_client) diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index fd97261692..6015ec8f07 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -1,7 +1,7 @@ import logging from collections.abc import Callable, Generator, Sequence from pathlib import Path -from unittest.mock import create_autospec, patch +from unittest.mock import create_autospec, patch, MagicMock import pytest from databricks.labs.blueprint.installation import JsonObject, MockInstallation @@ -20,7 +20,9 @@ from databricks.labs.lakebridge.contexts.application import ApplicationContext from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation +from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts from databricks.labs.lakebridge.install import WorkspaceInstaller +from databricks.labs.lakebridge.reconcile.connectors.credentials import build_recon_creds from databricks.labs.lakebridge.reconcile.constants import ReconSourceType, ReconReportType from databricks.labs.lakebridge.transpiler.installers import ( TranspilerInstaller, @@ -90,6 +92,7 @@ def test_workspace_installer_run_raise_error_in_dbr(ws: WorkspaceClient) -> None ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, environ=environ, ) @@ -115,6 +118,7 @@ def test_workspace_installer_run_install_not_called_in_test( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -142,6 +146,7 @@ def test_workspace_installer_run_install_called_with_provided_config( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -164,6 +169,7 @@ def test_configure_error_if_invalid_module_selected(ws: WorkspaceClient) -> None ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -203,6 +209,7 @@ def test_workspace_installer_run_install_called_with_generated_config( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) workspace_installer.run("transpile") @@ -253,6 +260,7 @@ def test_configure_transpile_no_existing_installation( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -322,6 +330,7 @@ def test_configure_transpile_installation_no_override(ws: WorkspaceClient) -> No ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) remorph_config = workspace_installer.configure(module="transpile") @@ -383,6 +392,7 @@ def test_configure_transpile_installation_config_error_continue_install( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -446,6 +456,7 @@ def test_configure_transpile_installation_with_no_validation(ws, ws_installer): ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -517,6 +528,7 @@ def test_configure_transpile_installation_with_validation_and_warehouse_id_from_ ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -574,6 +586,7 @@ def test_configure_reconcile_installation_no_override(ws: WorkspaceClient, recon ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) with pytest.raises(SystemExit): @@ -583,6 +596,7 @@ def test_configure_reconcile_installation_no_override(ws: WorkspaceClient, recon @pytest.mark.parametrize("datasource", ["oracle"]) def test_configure_reconcile_installation_config_error_continue_install( datasource: str, + secret_scope: str, ws: WorkspaceClient, reconcile_config: ReconcileConfig, oracle_reconcile_config_v2_yml: dict, @@ -592,7 +606,6 @@ def test_configure_reconcile_installation_config_error_continue_install( { r"Select the Data Source": str(RECONCILE_DATA_SOURCES.index(datasource)), r"Select the report type": str(RECONCILE_REPORT_TYPES.index("all")), - r"Enter Secret scope name to store .* connection details / secrets": f"remorph_{datasource}", r"Enter source database name for .*": "tpch_sf1000", r"Enter target catalog name for Databricks": "tpch", r"Enter target schema name for Databricks": "1000gb", @@ -621,6 +634,8 @@ def test_configure_reconcile_installation_config_error_continue_install( workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) + creds_mock.prompt_recon_creds.return_value = build_recon_creds(datasource, secret_scope) workspace_installer = WorkspaceInstaller( ctx.workspace_client, ctx.prompts, @@ -628,6 +643,7 @@ def test_configure_reconcile_installation_config_error_continue_install( ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) config = workspace_installer.configure(module="reconcile") @@ -644,13 +660,17 @@ def test_configure_reconcile_installation_config_error_continue_install( @pytest.mark.parametrize("datasource", ["snowflake", "databricks"]) @patch("webbrowser.open") def test_configure_reconcile_no_existing_installation( - _, datasource: str, ws: WorkspaceClient, reconcile_config: ReconcileConfig, reconcile_config_v2_yml: dict + _, + datasource: str, + secret_scope: str, + ws: WorkspaceClient, + reconcile_config: ReconcileConfig, + reconcile_config_v2_yml: dict, ) -> None: prompts = MockPrompts( { r"Select the Data Source": str(RECONCILE_DATA_SOURCES.index(datasource)), r"Select the report type": str(RECONCILE_REPORT_TYPES.index("all")), - r"Enter Secret scope name to store .* connection details / secrets": f"remorph_{datasource}", r"Enter source catalog name for .*": f"{datasource}_sample_data", r"Enter source schema name for .*": "tpch_sf1000", r"Enter target catalog name for Databricks": "tpch", @@ -672,6 +692,8 @@ def test_configure_reconcile_no_existing_installation( workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) + creds_mock.prompt_recon_creds.return_value = build_recon_creds(datasource, secret_scope) workspace_installer = WorkspaceInstaller( ctx.workspace_client, ctx.prompts, @@ -679,6 +701,7 @@ def test_configure_reconcile_no_existing_installation( ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) config = workspace_installer.configure(module="reconcile") @@ -693,6 +716,8 @@ def test_configure_reconcile_no_existing_installation( @pytest.mark.parametrize("datasource", ["snowflake"]) def test_configure_all_override_installation( + datasource: str, + secret_scope: str, ws_installer: Callable[..., WorkspaceInstaller], ws: WorkspaceClient, reconcile_config: ReconcileConfig, @@ -711,8 +736,7 @@ def test_configure_all_override_installation( r"Open .* in the browser?": "no", r"Select the Data Source": str(RECONCILE_DATA_SOURCES.index("snowflake")), r"Select the report type": str(RECONCILE_REPORT_TYPES.index("all")), - r"Enter Secret scope name to store .* connection details / secrets": "remorph_snowflake", - r"Enter source catalog name for .*": "snowflake_sample_data", + r"Enter source catalog name for .*": f"{datasource}_sample_data", r"Enter source schema name for .*": "tpch_sf1000", r"Enter target catalog name for Databricks": "tpch", r"Enter target schema name for Databricks": "1000gb", @@ -750,6 +774,8 @@ def test_configure_all_override_installation( workspace_installation=create_autospec(WorkspaceInstallation), ) + creds_mock = MagicMock(ReconConfigPrompts) + creds_mock.prompt_recon_creds.return_value = build_recon_creds(datasource, secret_scope) workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, @@ -757,6 +783,7 @@ def test_configure_all_override_installation( ctx.install_state, ctx.product_info, ctx.resource_configurator, + creds_mock, ctx.workspace_installation, ) @@ -855,6 +882,7 @@ def test_runs_upgrades_on_more_recent_version( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, ) @@ -925,6 +953,7 @@ def transpilers_path(self) -> Path: ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=_TranspilerRepository(), ) @@ -1014,6 +1043,7 @@ def test_runs_and_stores_force_config_option( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=transpiler_repository, ) @@ -1096,6 +1126,7 @@ def test_runs_and_stores_question_config_option( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=transpiler_repository, ) @@ -1184,6 +1215,7 @@ def test_runs_and_stores_choice_config_option( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=transpiler_repository, ) @@ -1238,6 +1270,7 @@ def test_installer_detects_installed_transpilers( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=mock_repository, ) @@ -1299,6 +1332,7 @@ def mock_factory(self, repository: TranspilerRepository) -> TranspilerInstaller: ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=mock_repository, transpiler_installers=(baz_installer.mock_factory, bar_installer.mock_factory), @@ -1376,6 +1410,7 @@ def install(self, artifact: Path | None = None) -> bool: ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, transpiler_repository=mock_repository, transpiler_installers=(MockTranspilerInstaller,), @@ -1434,6 +1469,7 @@ def test_no_reconfigure_if_noninteractive( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, is_interactive=False, ) @@ -1467,6 +1503,7 @@ def test_no_configure_if_noninteractive( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, is_interactive=False, ) @@ -1507,6 +1544,7 @@ def test_transpiler_installers_llm_flag( ctx.install_state, ctx.product_info, ctx.resource_configurator, + MagicMock(), ctx.workspace_installation, is_interactive=False, **kw_args,