From d268a624d58f0988499eba30b791b3e3acc1f009 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Thu, 6 Nov 2025 10:21:30 +0100
Subject: [PATCH 1/7] feat: add dry run to the read_gbq function

---
 pandas_gbq/gbq.py           |  5 ++++-
 pandas_gbq/gbq_connector.py | 14 +++++++++++++-
 tests/system/test_gbq.py    | 13 +++++++++++++
 tests/unit/test_gbq.py      |  7 +++++++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 880dcef9..40480a2b 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -119,6 +119,7 @@ def read_gbq(
     *,
     col_order=None,
     bigquery_client=None,
+    dry_run: bool = False,
 ):
     r"""Read data from Google BigQuery to a pandas DataFrame.
 
@@ -269,7 +270,8 @@ def read_gbq(
     bigquery_client : google.cloud.bigquery.Client, optional
         A Google Cloud BigQuery Python Client instance. If provided, it will be used for reading
         data, while the project and credentials parameters will be ignored.
-
+    dry_run : bool, default False
+        If True, run a dry run query.
     Returns
     -------
     df: DataFrame
@@ -328,6 +330,7 @@ def read_gbq(
             max_results=max_results,
             progress_bar_type=progress_bar_type,
             dtypes=dtypes,
+            dry_run=dry_run,
         )
     else:
         final_df = connector.download_table(
diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 2b3b716e..518de452 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -199,7 +199,14 @@ def download_table(
             user_dtypes=dtypes,
         )
 
-    def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
+    def run_query(
+        self,
+        query,
+        max_results=None,
+        progress_bar_type=None,
+        dry_run: bool = False,
+        **kwargs,
+    ):
         from google.cloud import bigquery
 
         job_config_dict = {
@@ -235,6 +242,7 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
 
         self._start_timer()
         job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict)
+        job_config.dry_run = dry_run
 
         if FEATURES.bigquery_has_query_and_wait:
             rows_iter = pandas_gbq.query.query_and_wait_via_client_library(
@@ -260,6 +268,10 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
             )
 
         dtypes = kwargs.get("dtypes")
+
+        if dry_run:
+            return rows_iter
+
         return self._download_results(
             rows_iter,
             max_results=max_results,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 1457ec30..355ee68e 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -656,6 +656,19 @@ def test_columns_and_col_order_raises_error(self, project_id):
                 dialect="standard",
             )
 
+    def test_read_gbq_with_dry_run(self, project_id):
+        query = "SELECT 1"
+        job = gbq.read_gbq(
+            query,
+            project_id=project_id,
+            credentials=self.credentials,
+            dialect="standard",
+            dry_run=True,
+        )
+        assert job.dry_run
+        assert job.state == "DONE"
+        assert job.total_bytes_processed > 0
+
 
 class TestToGBQIntegration(object):
     @pytest.fixture(autouse=True, scope="function")
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 75574820..fcbacc2a 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -937,3 +937,10 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
     type(mock_query_job).destination = mock.PropertyMock(return_value=None)
     connector.run_query("UPDATE tablename SET value = '';")
     mock_bigquery_client.list_rows.assert_not_called()
+
+
+def test_read_gbq_with_dry_run(mock_bigquery_client):
+    gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
+    _, kwargs = mock_bigquery_client.query.call_args
+    job_config = kwargs["job_config"]
+    assert job_config.dry_run is True

From 13fbf92276b496ef1c39b12ddb21f546bc348d68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Thu, 6 Nov 2025 10:27:30 +0100
Subject: [PATCH 2/7] return the cost (in GB) if dry run is set to True

---
 pandas_gbq/gbq_connector.py | 2 +-
 tests/system/test_gbq.py    | 8 ++++----
 tests/unit/test_gbq.py      | 6 ++++--
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 518de452..cc83e8df 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -270,7 +270,7 @@ def run_query(
         dtypes = kwargs.get("dtypes")
 
         if dry_run:
-            return rows_iter
+            return rows_iter.total_bytes_processed / 1024**3
 
         return self._download_results(
             rows_iter,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 355ee68e..3764cc8b 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -658,16 +658,16 @@ def test_columns_and_col_order_raises_error(self, project_id):
 
     def test_read_gbq_with_dry_run(self, project_id):
         query = "SELECT 1"
-        job = gbq.read_gbq(
+        cost = gbq.read_gbq(
             query,
             project_id=project_id,
             credentials=self.credentials,
             dialect="standard",
             dry_run=True,
         )
-        assert job.dry_run
-        assert job.state == "DONE"
-        assert job.total_bytes_processed > 0
+        assert isinstance(cost, float)
+        assert cost > 0
+
 
 
 class TestToGBQIntegration(object):
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index fcbacc2a..621a2448 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -939,8 +939,10 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
     mock_bigquery_client.list_rows.assert_not_called()
 
 
-def test_read_gbq_with_dry_run(mock_bigquery_client):
-    gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
+def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
+    type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
+    cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
     _, kwargs = mock_bigquery_client.query.call_args
     job_config = kwargs["job_config"]
     assert job_config.dry_run is True
+    assert cost == 12345 / 1024**3

From adcfc7bdc9dee67c52890e86185276f898e7fefa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Mon, 10 Nov 2025 09:17:52 +0100
Subject: [PATCH 3/7] updates to fix test

---
 pandas_gbq/gbq.py           |  8 ++++++--
 pandas_gbq/gbq_connector.py | 10 +++++++++-
 pandas_gbq/query.py         | 33 ++++++++++++++++++++++++++++++++-
 tests/unit/test_gbq.py      | 11 +++++++++--
 tests/unit/test_query.py    | 10 +++++++---
 5 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 40480a2b..75fa3510 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -274,8 +274,9 @@ def read_gbq(
         If True, run a dry run query.
     Returns
     -------
-    df: DataFrame
-        DataFrame representing results of query.
+    df: DataFrame or float
+        DataFrame representing results of query. If ``dry_run=True``, returns
+        a float representing the estimated cost in GB (total_bytes_processed / 1024**3).
     """
     if dialect is None:
         dialect = context.dialect
@@ -332,6 +333,9 @@ def read_gbq(
             dtypes=dtypes,
             dry_run=dry_run,
         )
+        # When dry_run=True, run_query returns a float (cost in GB), not a DataFrame
+        if dry_run:
+            return final_df
     else:
         final_df = connector.download_table(
             query_or_table,
diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index cc83e8df..2c48f6fa 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -270,7 +270,15 @@ def run_query(
         dtypes = kwargs.get("dtypes")
 
         if dry_run:
-            return rows_iter.total_bytes_processed / 1024**3
+            # Access total_bytes_processed from the QueryJob via RowIterator.job
+            # RowIterator has a job attribute that references the QueryJob
+            query_job = rows_iter.job if hasattr(rows_iter, 'job') and rows_iter.job else None
+            if query_job is None:
+                # Fallback: if query_and_wait_via_client_library doesn't set job,
+                # we need to get it from the query result
+                # For query_and_wait_via_client_library, the RowIterator should have job set
+                raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
+            return query_job.total_bytes_processed / 1024**3
 
         return self._download_results(
             rows_iter,
diff --git a/pandas_gbq/query.py b/pandas_gbq/query.py
index 83575a9c..ba0f1d72 100644
--- a/pandas_gbq/query.py
+++ b/pandas_gbq/query.py
@@ -179,7 +179,12 @@ def query_and_wait(
     # getQueryResults() instead of tabledata.list, which returns the correct
     # response with DML/DDL queries.
     try:
-        return query_reply.result(max_results=max_results)
+        rows_iter = query_reply.result(max_results=max_results)
+        # Store reference to QueryJob in RowIterator for dry_run access
+        # RowIterator already has a job attribute, but ensure it's set
+        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+            rows_iter.job = query_reply
+        return rows_iter
     except connector.http_error as ex:
         connector.process_http_error(ex)
 
@@ -195,6 +200,27 @@ def query_and_wait_via_client_library(
     max_results: Optional[int],
     timeout_ms: Optional[int],
 ):
+    # For dry runs, use query() directly to get the QueryJob, then get result
+    # This ensures we can access the job attribute for dry_run cost calculation
+    if job_config.dry_run:
+        query_job = try_query(
+            connector,
+            functools.partial(
+                client.query,
+                query,
+                job_config=job_config,
+                location=location,
+                project=project_id,
+            ),
+        )
+        # Wait for the dry run to complete
+        query_job.result(timeout=timeout_ms / 1000.0 if timeout_ms else None)
+        # Get the result iterator and ensure job attribute is set
+        rows_iter = query_job.result(max_results=max_results)
+        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+            rows_iter.job = query_job
+        return rows_iter
+    
     rows_iter = try_query(
         connector,
         functools.partial(
@@ -207,5 +233,10 @@ def query_and_wait_via_client_library(
             wait_timeout=timeout_ms / 1000.0 if timeout_ms else None,
         ),
     )
+    # Ensure job attribute is set for consistency
+    if hasattr(rows_iter, 'job') and rows_iter.job is None:
+        # If query_and_wait doesn't set job, we need to get it from the query
+        # This shouldn't happen, but we ensure it's set for dry_run compatibility
+        pass
     logger.debug("Query done.\n")
     return rows_iter
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 621a2448..e63c364a 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -76,6 +76,8 @@ def generate_schema():
 @pytest.fixture(autouse=True)
 def default_bigquery_client(mock_bigquery_client, mock_query_job, mock_row_iterator):
     mock_query_job.result.return_value = mock_row_iterator
+    # Set up RowIterator.job to point to QueryJob for dry_run access
+    mock_row_iterator.job = mock_query_job
     mock_bigquery_client.list_rows.return_value = mock_row_iterator
     mock_bigquery_client.query.return_value = mock_query_job
 
@@ -942,7 +944,12 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
 def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
     type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
     cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
-    _, kwargs = mock_bigquery_client.query.call_args
-    job_config = kwargs["job_config"]
+    # Check which method was called based on BigQuery version
+    if hasattr(mock_bigquery_client, "query_and_wait") and mock_bigquery_client.query_and_wait.called:
+        _, kwargs = mock_bigquery_client.query_and_wait.call_args
+        job_config = kwargs["job_config"]
+    else:
+        _, kwargs = mock_bigquery_client.query.call_args
+        job_config = kwargs["job_config"]
     assert job_config.dry_run is True
     assert cost == 12345 / 1024**3
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index 2437fa02..1ab7e54f 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -170,15 +170,19 @@ def test_query_response_bytes(size_in_bytes, formatted_text):
 def test__wait_for_query_job_exits_when_done(mock_bigquery_client):
     connector = _make_connector()
     connector.client = mock_bigquery_client
-    connector.start = datetime.datetime(2020, 1, 1).timestamp()
 
     mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
     type(mock_query).state = mock.PropertyMock(side_effect=("RUNNING", "DONE"))
     mock_query.result.side_effect = concurrent.futures.TimeoutError("fake timeout")
 
-    with freezegun.freeze_time("2020-01-01 00:00:00", tick=False):
+    frozen_time = datetime.datetime(2020, 1, 1)
+    with freezegun.freeze_time(frozen_time, tick=False):
+        # Set start time inside frozen context to ensure elapsed time is 0
+        connector.start = frozen_time.timestamp()
+        # Mock get_elapsed_seconds to return 0 to prevent timeout
+        connector.get_elapsed_seconds = mock.Mock(return_value=0.0)
         module_under_test._wait_for_query_job(
-            connector, mock_bigquery_client, mock_query, 60
+            connector, mock_bigquery_client, mock_query, 1000
         )
 
     mock_bigquery_client.cancel_job.assert_not_called()

From e9f4c00941603d750739d78585673ce442ee5f17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Thu, 13 Nov 2025 00:39:17 +0100
Subject: [PATCH 4/7] fix lint

---
 pandas_gbq/gbq_connector.py | 4 +++-
 pandas_gbq/query.py         | 8 ++++----
 tests/system/test_gbq.py    | 1 -
 tests/unit/test_gbq.py      | 5 ++++-
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 2c48f6fa..0d425ecb 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -272,7 +272,9 @@ def run_query(
         if dry_run:
             # Access total_bytes_processed from the QueryJob via RowIterator.job
             # RowIterator has a job attribute that references the QueryJob
-            query_job = rows_iter.job if hasattr(rows_iter, 'job') and rows_iter.job else None
+            query_job = (
+                rows_iter.job if hasattr(rows_iter, "job") and rows_iter.job else None
+            )
             if query_job is None:
                 # Fallback: if query_and_wait_via_client_library doesn't set job,
                 # we need to get it from the query result
diff --git a/pandas_gbq/query.py b/pandas_gbq/query.py
index ba0f1d72..e564e052 100644
--- a/pandas_gbq/query.py
+++ b/pandas_gbq/query.py
@@ -182,7 +182,7 @@ def query_and_wait(
         rows_iter = query_reply.result(max_results=max_results)
         # Store reference to QueryJob in RowIterator for dry_run access
         # RowIterator already has a job attribute, but ensure it's set
-        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+        if not hasattr(rows_iter, "job") or rows_iter.job is None:
             rows_iter.job = query_reply
         return rows_iter
     except connector.http_error as ex:
@@ -217,10 +217,10 @@ def query_and_wait_via_client_library(
         query_job.result(timeout=timeout_ms / 1000.0 if timeout_ms else None)
         # Get the result iterator and ensure job attribute is set
         rows_iter = query_job.result(max_results=max_results)
-        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+        if not hasattr(rows_iter, "job") or rows_iter.job is None:
             rows_iter.job = query_job
         return rows_iter
-    
+
     rows_iter = try_query(
         connector,
         functools.partial(
@@ -234,7 +234,7 @@ def query_and_wait_via_client_library(
         ),
     )
     # Ensure job attribute is set for consistency
-    if hasattr(rows_iter, 'job') and rows_iter.job is None:
+    if hasattr(rows_iter, "job") and rows_iter.job is None:
         # If query_and_wait doesn't set job, we need to get it from the query
         # This shouldn't happen, but we ensure it's set for dry_run compatibility
         pass
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 3764cc8b..ac7f0d87 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -669,7 +669,6 @@ def test_read_gbq_with_dry_run(self, project_id):
         assert cost > 0
 
 
-
 class TestToGBQIntegration(object):
     @pytest.fixture(autouse=True, scope="function")
     def setup(self, project, credentials, random_dataset_id):
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index e63c364a..37812480 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -945,7 +945,10 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
     type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
     cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
     # Check which method was called based on BigQuery version
-    if hasattr(mock_bigquery_client, "query_and_wait") and mock_bigquery_client.query_and_wait.called:
+    if (
+        hasattr(mock_bigquery_client, "query_and_wait")
+        and mock_bigquery_client.query_and_wait.called
+    ):
         _, kwargs = mock_bigquery_client.query_and_wait.call_args
         job_config = kwargs["job_config"]
     else:

From a171ff467dcc6e7568394ee522daef7efe3e1b14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Sun, 16 Nov 2025 11:48:11 +0100
Subject: [PATCH 5/7] Remove unit conversion

---
 pandas_gbq/gbq_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 0d425ecb..ab4b2068 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -280,7 +280,7 @@ def run_query(
                 # we need to get it from the query result
                 # For query_and_wait_via_client_library, the RowIterator should have job set
                 raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
-            return query_job.total_bytes_processed / 1024**3
+            return query_job.total_bytes_processed
 
         return self._download_results(
             rows_iter,

From 8207a4700ebe30d76e54e0572e46400d4c51ebc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Tue, 18 Nov 2025 08:05:46 +0100
Subject: [PATCH 6/7] fix docs

---
 pandas_gbq/gbq.py      | 2 +-
 tests/unit/test_gbq.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 75fa3510..abb5ad76 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -276,7 +276,7 @@ def read_gbq(
     -------
     df: DataFrame or float
         DataFrame representing results of query. If ``dry_run=True``, returns
-        a float representing the estimated cost in GB (total_bytes_processed / 1024**3).
+        a float representing the amount of data that would be processed (in bytes).
     """
     if dialect is None:
         dialect = context.dialect
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 37812480..b6d1a5e6 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -955,4 +955,4 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
         _, kwargs = mock_bigquery_client.query.call_args
         job_config = kwargs["job_config"]
     assert job_config.dry_run is True
-    assert cost == 12345 / 1024**3
+    assert cost >= 0 

From 171a6f5dbbe156232ddcefce6a73f3788c309919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Wed, 26 Nov 2025 13:07:28 +0100
Subject: [PATCH 7/7] modify doc to use int instead of float + remove trailing
 space

---
 pandas_gbq/gbq.py      | 4 ++--
 tests/unit/test_gbq.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 1345404e..b9008c90 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -269,9 +269,9 @@ def read_gbq(
         If True, run a dry run query.
     Returns
     -------
-    df: DataFrame or float
+    df: DataFrame or int
         DataFrame representing results of query. If ``dry_run=True``, returns
-        a float representing the amount of data that would be processed (in bytes).
+        aan integer representing the amount of data that would be processed (in bytes).
     """
     if dialect is None:
         dialect = context.dialect
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 0568b7b6..070eb351 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -956,4 +956,4 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
         _, kwargs = mock_bigquery_client.query.call_args
         job_config = kwargs["job_config"]
     assert job_config.dry_run is True
-    assert cost >= 0 
+    assert cost >= 0