From 22e79bd54c61e77b37f1588a159bc812b337932a Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Sun, 11 Aug 2024 12:59:29 -0800 Subject: [PATCH] fix: log DDL SQL in verbose mode Before (and still), we log the SQL inside Backend.compile(). I *think* what this means is that we are logging all SELECT statements. But, any DDL statements like from create_table() were not logged. Now they are. Some SQL statements may be logged twice now, in .compile() and in .raw_sql(), but I don't think that's a big problem? --- ibis/backends/bigquery/__init__.py | 4 +--- ibis/backends/duckdb/__init__.py | 30 ++++++++++++++--------------- ibis/backends/flink/__init__.py | 1 + ibis/backends/impala/__init__.py | 4 ++-- ibis/backends/mssql/__init__.py | 1 + ibis/backends/mysql/__init__.py | 1 + ibis/backends/oracle/__init__.py | 1 + ibis/backends/postgres/__init__.py | 2 +- ibis/backends/pyspark/__init__.py | 1 + ibis/backends/snowflake/__init__.py | 1 + ibis/backends/sqlite/__init__.py | 1 + ibis/backends/trino/__init__.py | 2 +- 12 files changed, 27 insertions(+), 22 deletions(-) diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index a1bef8f57f2f..4615444e321a 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -666,6 +666,7 @@ def raw_sql(self, query: str, params=None, page_size: int | None = None): ] with contextlib.suppress(AttributeError): query = query.sql(self.dialect) + self._log(query) job_config = bq.job.QueryJobConfig(query_parameters=query_parameters or []) return self.client.query_and_wait( @@ -737,7 +738,6 @@ def execute(self, expr, params=None, limit="default", **kwargs): schema = expr.as_table().schema() - ibis.schema({"_TABLE_SUFFIX": "string"}) sql = self.compile(expr, limit=limit, params=params, **kwargs) - self._log(sql) query = self.raw_sql(sql, params=params, **kwargs) arrow_t = query.to_arrow( @@ -799,7 +799,6 @@ def to_pyarrow( self._import_pyarrow() self._register_in_memory_tables(expr) sql = self.compile(expr, limit=limit, params=params, **kwargs) - self._log(sql) query = self.raw_sql(sql, params=params, **kwargs) table = query.to_arrow( progress_bar_type=None, bqstorage_client=self.storage_client @@ -822,7 +821,6 @@ def to_pyarrow_batches( self._register_in_memory_tables(expr) sql = self.compile(expr, limit=limit, params=params, **kwargs) - self._log(sql) query = self.raw_sql(sql, params=params, page_size=chunk_size, **kwargs) batch_iter = query.to_arrow_iterable(bqstorage_client=self.storage_client) return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), batch_iter) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 6341605849b7..06634625b21d 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -91,6 +91,7 @@ def current_database(self) -> str: def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name) + self._log(query) return self.con.execute(query, **kwargs) def create_table( @@ -203,43 +204,42 @@ def create_table( # This is the same table as initial_table unless overwrite == True final_table = sge.Table( this=sg.to_identifier(name, quoted=self.compiler.quoted), - catalog=catalog, - db=database, + catalog=sg.to_identifier(catalog, quoted=self.compiler.quoted), + db=sg.to_identifier(database, quoted=self.compiler.quoted), ) with self._safe_raw_sql(create_stmt) as cur: + + def cur_exec(stmt): + sql = stmt.sql(self.name) + self._log(sql) + return cur.execute(sql) + if query is not None: - insert_stmt = sge.insert(query, into=initial_table).sql(self.name) - cur.execute(insert_stmt).fetchall() + cur_exec(sge.insert(query, into=initial_table)).fetchall() if overwrite: - cur.execute( - sge.Drop(kind="TABLE", this=final_table, exists=True).sql(self.name) - ) + cur_exec(sge.Drop(kind="TABLE", this=final_table, exists=True)) # TODO: This branching should be removed once DuckDB >=0.9.3 is # our lower bound (there's an upstream bug in 0.9.2 that # disallows renaming temp tables) # We should (pending that release) be able to remove the if temp # branch entirely. if temp: - cur.execute( + cur_exec( sge.Create( kind="TABLE", this=final_table, expression=sg.select(STAR).from_(initial_table), properties=sge.Properties(expressions=properties), - ).sql(self.name) - ) - cur.execute( - sge.Drop(kind="TABLE", this=initial_table, exists=True).sql( - self.name ) ) + cur_exec(sge.Drop(kind="TABLE", this=initial_table, exists=True)) else: - cur.execute( + cur_exec( sge.AlterTable( this=initial_table, actions=[sge.RenameTable(this=final_table)], - ).sql(self.name) + ) ) if temp_memtable_view is not None: diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index f959ca1e5f0c..43c9962160ee 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -88,6 +88,7 @@ def disconnect(self) -> None: pass def raw_sql(self, query: str) -> TableResult: + self._log(query) return self._table_env.execute_sql(query) def _get_schema_using_query(self, query: str) -> sch.Schema: diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index e6d8a3e4fc18..6c9a8e98f6e9 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -247,12 +247,12 @@ def raw_sql(self, query: str): try: for k, v in self.options.items(): q = f"SET {k} = {v!r}" - util.log(q) + self._log(q) cursor.execute_async(q) cursor._wait_to_finish() - util.log(query) + self._log(query) cursor.execute_async(query) cursor._wait_to_finish() diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index a1bb4f0b0f09..26eee1b70d88 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -338,6 +338,7 @@ def _safe_ddl(self, query, *args, **kwargs): def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(self.dialect) + self._log(query) con = self.con cursor = con.cursor() diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 7b279fd1326f..18cf485c7d6d 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -275,6 +275,7 @@ def _safe_raw_sql(self, *args, **kwargs): def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name) + self._log(query) con = self.con cursor = con.cursor() diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index c4d43f280ac8..fe9434864bbc 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -219,6 +219,7 @@ def _safe_raw_sql(self, *args, **kwargs): def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name) + self._log(query) con = self.con cursor = con.cursor() diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 616480757336..22d04004ee48 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -747,7 +747,7 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.dialect) - + self._log(query) con = self.con cursor = con.cursor() diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 11c62f290441..d179ea4ced8a 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -410,6 +410,7 @@ def _safe_raw_sql(self, query: str) -> Any: def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.dialect) + self._log(query) return self._session.sql(query, **kwargs) def execute( diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 32350aa9de27..f0b33b7a4447 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -728,6 +728,7 @@ def _safe_raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name) + self._log(query) cur = self.con.cursor() try: cur.execute(query, **kwargs) diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index e770cecd72be..e6485c3f7e6d 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -126,6 +126,7 @@ def _post_connect( def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: if not isinstance(query, str): query = query.sql(dialect=self.name) + self._log(query) return self.con.execute(query, **kwargs) @contextlib.contextmanager diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index f7af4c493649..01b658d119ca 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -57,7 +57,7 @@ def raw_sql(self, query: str | sg.Expression) -> Any: """Execute a raw SQL query.""" with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name, pretty=True) - + self._log(query) con = self.con cur = con.cursor() try: