pandas-dev · quangngd · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1021,6 +1021,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
+- Bug in :meth:`read_csv` with ``on_bad_lines="skip"`` and ``chunksize`` where the bad line would not be skipped if it was the first line in the chunk(:issue:`61973`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
 - Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)

diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -413,8 +413,10 @@ static int end_line(parser_t *self) {
     return 0;
   }
 
-  if (!(self->lines <= self->header_end + 1) && (fields > ex_fields) &&
-      !(self->usecols)) {
+  bool past_headers = (self->lines > self->header_end + 1) ||
+                      (self->lines == 1 && self->header_end == 0);
+
+  if (past_headers && (fields > ex_fields) && !(self->usecols)) {
     // increment file line count
     self->file_lines++;
 

diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
@@ -381,3 +381,25 @@ def test_chunksize_second_block_shorter(all_parsers):
 
     for i, result in enumerate(result_chunks):
         tm.assert_frame_equal(result, expected_frames[i])
+
+
+def test_chunksize_skip_bad_line_with_bad_line_first_in_the_chunk(all_parsers):
+    # GH#61973
+    parser = all_parsers
+    data = "a,b\n1,2\n3\n4,5,extra\n6,7"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), chunksize=2, on_bad_lines="skip")
+        return
+
+    result_chunks = parser.read_csv(StringIO(data), chunksize=2, on_bad_lines="skip")
+
+    expected_frames = [
+        DataFrame({"a": [1, 3], "b": [2, np.nan]}),
+        DataFrame({"a": [6], "b": [7]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])