Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
.venv/
.venv
.python-version
.sdkmanrc
.DS_Store
.DS_Store/
*.pyc
__pycache__
dist
.idea
.idea/
/htmlcov/
*.iml
target/
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ dependencies = [
"pygls~=2.0.0",
"duckdb~=1.2.2",
"databricks-switch-plugin~=0.1.5", # Temporary, until Switch is migrated to be a transpiler (LSP) plugin.
"requests>=2.28.1,<3" # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts.

"requests>=2.28.1,<3", # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts.
"oracledb==3.4.0"
]

[project.urls]
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/lakebridge/assessments/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

PLATFORM_TO_SOURCE_TECHNOLOGY_CFG = {
"synapse": "src/databricks/labs/lakebridge/resources/assessments/synapse/pipeline_config.yml",
"oracle": "src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml",
}

# TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported
PROFILER_SOURCE_SYSTEM = ["synapse"]
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse","oracle"]


# This flag indicates whether a connector is required for the source system when pipeline is trigger
Expand All @@ -18,4 +19,5 @@
CONNECTOR_REQUIRED = {
"synapse": False,
"mssql": True,
"oracle": True,
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,37 @@ def run(self):
self._test_connection(source, cred_manager)
logger.info(f"{source.capitalize()} Assessment Configuration Completed")

class ConfigureOracleAssessment(AssessmentConfigurator):
"""Oracle specific assessment configuration."""

def _configure_credentials(self) -> str:
cred_file = self._credential_file
source = self._source_name

logger.info(
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
"from environment variables fall back to plain text if not variable is not found\n",
)
secret_vault_type = str(self.prompts.choice("Enter secret vault type (local | env)", ["local", "env"])).lower()
secret_vault_name = None

logger.info("Please refer to the documentation to understand the difference between local and env.")

credential = {
"secret_vault_type": secret_vault_type,
"secret_vault_name": secret_vault_name,
source: {
"host": self.prompts.question("Enter the host details (Server name, IP address, SCAN Name)"),
"tnsPort": int(self.prompts.question("Enter the TNS Listener port number", default=1521, valid_number=True)),
"tnsService": self.prompts.question("Enter the TNS service name as registered in the Oracle listener", default="orcl"),
"user": self.prompts.question("Enter user name with system privileges", default="SYSTEM"),
"password": self.prompts.password("Enter user password"),
},
}

_save_to_disk(credential, cred_file)
logger.info(f"Credential template created for {source}.")
return source

class ConfigureSqlServerAssessment(AssessmentConfigurator):
"""SQL Server specific assessment configuration."""
Expand Down Expand Up @@ -186,6 +217,7 @@ def create_assessment_configurator(
configurators = {
"mssql": ConfigureSqlServerAssessment,
"synapse": ConfigureSynapseAssessment,
"oracle": ConfigureOracleAssessment,
}

if source_system not in configurators:
Expand Down
25 changes: 20 additions & 5 deletions src/databricks/labs/lakebridge/connections/database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from sqlalchemy.orm.session import Session

logger = logging.getLogger(__name__)
logger.setLevel("INFO")

logger.setLevel(logging.INFO)

@dataclasses.dataclass
class FetchResult:
Expand All @@ -30,7 +29,6 @@ def _connect(self) -> Engine:
def fetch(self, query: str) -> FetchResult:
pass


class _BaseConnector(DatabaseConnector):
def __init__(self, config: dict[str, Any]):
self.config = config
Expand All @@ -53,6 +51,7 @@ def _create_connector(db_type: str, config: dict[str, Any]) -> DatabaseConnector
"snowflake": SnowflakeConnector,
"mssql": MSSQLConnector,
"tsql": MSSQLConnector,
"oracle": OracleConnector,
}

connector_class = connectors.get(db_type.lower())
Expand All @@ -66,8 +65,6 @@ def _create_connector(db_type: str, config: dict[str, Any]) -> DatabaseConnector
class SnowflakeConnector(_BaseConnector):
def _connect(self) -> Engine:
raise NotImplementedError("Snowflake connector not implemented")


class MSSQLConnector(_BaseConnector):
def _connect(self) -> Engine:
auth_type = self.config.get('auth_type', 'sql_authentication')
Expand Down Expand Up @@ -98,9 +95,25 @@ def _connect(self) -> Engine:
return create_engine(connection_string)


class OracleConnector(_BaseConnector):
def _connect(self) -> Engine:

db_name = self.config.get('tnsService')
connection_string = URL.create(
drivername="oracle+oracledb",
username=self.config['user'],
password=self.config['password'],
host=self.config['host'],
port=self.config.get('tnsPort', 1521),
database=db_name
)

return create_engine(connection_string)

class DatabaseManager:
def __init__(self, db_type: str, config: dict[str, Any]):
self.connector = _create_connector(db_type, config)
self._db_type = db_type

def fetch(self, query: str) -> FetchResult:
try:
Expand All @@ -111,6 +124,8 @@ def fetch(self, query: str) -> FetchResult:

def check_connection(self) -> bool:
query = "SELECT 101 AS test_column"
if self._db_type.lower() == "Oracle":
query = "SELECT 101 AS test_column FROM dual"
result = self.fetch(query)
if result is None:
return False
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Has to be executed on CDB

select listagg(inst_id, ',') within group (order by name) as inst_ids,name,open_mode,pdb_count from gv$containers group by name,open_mode,pdb_count

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
-- Has to be executed on CDB
with t as (select version from product_component_version where product like 'Oracle Database%'),
inst_cnt as (select count(*) as cnt from gv$instance),
pdb_cnt as (select count(distinct con_id ) as cnt from gv$pdbs where name!='PDB$SEED'),
pdb_namelist as (select listagg(name,',') within group (order by name) as pdbnames from (select distinct name from gv$pdbs where name !='PDB$SEED')),
-- select listagg(distinct name,',') as pdbnames from gv$pdbs where name !='PDB$SEED'),
cpu_cores_global as (select 'CLUSTER' as scope, null , stat_name,'CPU GLOBAL (Cluster): '||stat_name as detailed_stat_name,sum(to_number(value)) as value
from gv$osstat
where stat_name in ('NUM_CPUS','NUM_CPU_CORES','NUM_CPU_SOCKETS')
group by stat_name),
cpu_cores_details as (select 'INSTANCE' as scope,inst_id, stat_name,'CPU per Instance Id: '||inst_id||' - '||stat_name as detailed_stat_name, to_number(value) as value
from gv$osstat
where stat_name in ('NUM_CPUS','NUM_CPU_CORES','NUM_CPU_SOCKETS')
order by inst_id,stat_name)
select * from
(
select null as scope, null as inst_id, null as stat_name, 'VERSION' as name,to_char(t.version) as value from t
union
select null as scope, null as inst_id, null as stat_name, 'INSTANCE COUNT', to_char(inst_cnt.cnt) from inst_cnt
union
select null as scope, null as inst_id, null as stat_name, 'PDB COUNT',to_char(pdb_cnt.cnt) from pdb_cnt
union
select null as scope, null as inst_id, null as stat_name, 'PDB LIST', to_char(pdb_namelist.pdbnames) from pdb_namelist
union
select scope, null as inst_id, stat_name,detailed_stat_name, to_char(value) from cpu_cores_global
union
select scope, inst_id, stat_name, detailed_stat_name, to_char(value) from cpu_cores_details
)
order by name
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Has to be executed on CDB
-- spool results/config_instance.csv
select inst_id,instance_name,version,database_type from gv$instance

Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
select NVL(con.name,'Entire CDB/Non CDB') con_name,
param.instance_number,
to_char(snap.snap_time,'yyyy-mm-dd HH24:MI:SS') as snap_time,
parameter_name,
value
from cdb_hist_parameter param,
(select con_id,name from v$containers) con,
(select con_id,snap_id,instance_number,begin_interval_time snap_time from cdb_hist_snapshot) snap
where 1=1
and param.snap_id=snap.snap_id
and param.con_id=con.con_id(+)
and param.instance_number=snap.instance_number
and param.parameter_name in ('sga_target',
'pga_aggregate_target',
'db_cache_size',
'shared_pool_size',
'large_pool_size',
'java_pool_size',
'streams_pool_size',
'db_16k_cache_size',
'db_2k_cache_size',
'db_32k_cache_size',
'db_4k_cache_size',
'db_8k_cache_size',
'memory_target',
'memory_max_target')
order by 1,3,2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- Has to be executed on CDB

SELECT cont.NAME as PDB_NAME,OWNER,OBJECT_TYPE,COUNT(*) as CNT
FROM CDB_OBJECTS o,
(select distinct con_id as con_id, name from gv$containers) cont
WHERE o.CON_ID=cont.con_id
AND OWNER in (select username from cdb_users where oracle_maintained='N' and cont.con_id=con_id)
GROUP BY cont.name,OWNER,OBJECT_TYPE
ORDER BY 1,2

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- Has to be executed on CDB
SELECT cont.NAME as PDB_NAME,OWNER,OBJECT_TYPE,CNT
from
(
select owner,con_id,'TABLE (NON PARTITIONED)' as OBJECT_TYPE,count(*) as cnt from cdb_tables where partitioned='NO' group by owner,con_id
union
select owner,con_id,'TABLE (PARTITIONED)',count(*) as cnt from cdb_tables where partitioned='YES' group by owner,con_id
union
select owner,con_id,'INDEX (NON PARTITIONED)',count(*) as cnt from cdb_indexes where partitioned='NO' group by owner,con_id
union
select owner,con_id,'INDEX (PARTITIONED)',count(*) as cnt from cdb_indexes where partitioned='YES' group by owner,con_id
union
select owner,con_id,'LOBS (NON PARTITIONED)',count(*) as cnt from cdb_lobs where partitioned='NO' group by owner,con_id
union
select owner,con_id,'LOBS (PARTITIONED)',count(*) as cnt from cdb_lobs where partitioned='YES' group by owner,con_id
) u,
(select distinct con_id as con_id, name from gv$containers) cont
WHERE u.CON_ID=cont.con_id
AND OWNER in (select username from cdb_users where oracle_maintained='N' and cont.con_id=u.con_id)
ORDER BY 1,2

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
select con_name,
-- sub.tablespace_name,
case
when tablespace_name in ('SYSTEM','SYSAUX') then 'SYSTEM'
when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='UNDO' and con_id=sub.con_id) then 'UNDO'
-- when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='TEMPORARY' and con_id=sub.con_id) then 'TEMP'
ELSE 'USER_DATA'
end as tablespace_type,
sum(gb) gb,
sum(freegb) freegb,
sum(maxgb) maxgb
from
(
select c.name as con_name,c.con_id,
f.tablespace_name,
f.bytes/1024/1024 mb, f.bytes/1024/1024/1024 gb,
t.free_bytes/1024/1024 freemb, t.free_bytes/1024/1024/1024 freegb,
f.maxbytes/1024/1024 maxmb, f.maxbytes/1024/1024/1024 maxgb
from
(select con_id,tablespace_name,bytes,maxbytes from cdb_data_files ) f,
(select con_id,tablespace_name,sum(bytes) free_bytes from cdb_free_space group by con_id,tablespace_name ) t,
(select distinct con_id,name from gv$containers) c
where 1=1
and t.con_id=f.con_id
and t.con_id=c.con_id
and t.tablespace_name=f.tablespace_name
) sub
group by con_name,
--tablespace_name
case
when tablespace_name in ('SYSTEM','SYSAUX') then 'SYSTEM'
when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='UNDO' and con_id=sub.con_id) then 'UNDO'
-- when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='TEMPORARY' and con_id=sub.con_id) then 'TEMP'
ELSE 'USER_DATA'
end
order by 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- Has to be executed on CDB

select cont.name as pdb_name,
ash.instance_number,
ash.mtime,
ash.event,
ash.wait_class,
ash.total_wait_time
from (SELECT instance_number,con_id,CON_DBID,
TO_CHAR(sample_time,'YYYY-MM-DD HH24') mtime,
NVL(a.event, 'ON CPU') AS event,
NVL(a.wait_class, 'ON CPU') AS wait_class,
COUNT(*)*10 AS total_wait_time
FROM cdb_hist_active_sess_history a
GROUP BY instance_number,
con_id, CON_DBID,
TO_CHAR(sample_time,'YYYY-MM-DD HH24'),
a.event,
a.wait_class
) ash,
(select distinct con_id,name,dbid from gv$containers) cont
where cont.con_id=ash.con_id
and cont.dbid=ash.CON_DBID
ORDER BY pdb_name,mtime

Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
select con.name,
sh.instance_number,u.username,
to_char(sh.sample_time,'YYYY-MM-DD HH24:MI') as snap_time,
count(distinct sh.session_id||','||sh.session_serial#) as foregd_session_cnt
from cdb_hist_active_sess_history sh,
(
select con_id, dbid, name
from v$containers
where name != 'PDB$SEED'
) con,
(select distinct user_id,username
from cdb_users
where username not in ('SYS','SYSTEM','XS$NULL',
'OJVMSYS','LBACSYS','OUTLN','SYS$UMF','DBSNMP',
'APPQOSSYS','DBSFWUSER','GGSYS','ANONYMOUS','CTXSYS',
'DVF','DVSYS','GSMADMIN_INTERNAL','MDSYS','OLAPSYS',
'XDB','WMSYS','GSMCATUSER','MDDATA','REMOTE_SCHEDULER_AGENT',
'SYSBACKUP','GSMUSER','GSMROOTUSER','SYSRAC',
'SI_INFORMTN_SCHEMA','AUDSYS','DIP','ORDPLUGINS','ORDDATA','SYSKM',
'ORACLE_OCM','ORDSYS','SYSDG','SYS','SYSTEM','XS$NULL','LBACSYS',
'OUTLN','DBSNMP','APPQOSSYS','DBSFWUSER','GGSYS',
'ANONYMOUS','CTXSYS','DVF','DVSYS','GSMADMIN_INTERNAL',
'MDSYS','OLAPSYS','XDB','WMSYS','GSMCATUSER','MDDATA',
'REMOTE_SCHEDULER_AGENT','SYSBACKUP','GSMUSER','SYSRAC',
'OJVMSYS','SI_INFORMTN_SCHEMA','AUDSYS','DIP',
'ORDPLUGINS','ORDDATA','SYSKM','ORACLE_OCM',
'SYS$UMF','ORDSYS','SYSDG')
) u
where sh.con_id = con.con_id
-- and sh.dbid=con.dbid
and u.user_id=sh.user_id
and sh.session_type = 'FOREGROUND'
group by con.name,
sh.instance_number,
to_char(sh.sample_time,'YYYY-MM-DD HH24:MI'),
u.username
order by 1, 4, 2
Loading
Loading