Run ruff format and check

ghodsizadeh · Jan 5, 2025 · f1f92aa · f1f92aa
1 parent ac7ad75
commit f1f92aa
Show file tree

Hide file tree

Showing 6 changed files with 56 additions and 32 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,10 +1,23 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.2.0
-    hooks:
-    -   id: trailing-whitespace
-    -   id: end-of-file-fixer
-    -   id: check-yaml
-    -   id: check-added-large-files
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v3.2.0
+      hooks:
+          - id: trailing-whitespace
+          - id: end-of-file-fixer
+          - id: check-yaml
+          - id: check-added-large-files
+
+    - repo: local
+      hooks:
+          - id: ruff-format
+            name: ruff-format
+            entry: uv run ruff format
+            language: python
+            types_or: [ python, pyi ]
+          - id: ruff-lint
+            name: ruff-lint
+            entry: uv run ruff check
+            language: python
+            types_or: [ python, pyi ]
diff --git a/README.md b/README.md
@@ -71,4 +71,4 @@ for df in dfs:
 
 ## TODO:
 - [ ] Convert datatype to numeric
-- [ ]
+- [ ]
diff --git a/pdf2csv/cli.py b/pdf2csv/cli.py
@@ -11,25 +11,24 @@
 def convert_cli(
     pdf_path: str = typer.Argument(..., help="Path to the input PDF file."),
     output_dir: Optional[str] = typer.Option(
-        '.', "--output-dir", "-o", help="Directory to save output files."
+        ".", "--output-dir", "-o", help="Directory to save output files."
     ),
     rtl: bool = typer.Option(
-        False, 
+        False,
         "--rtl/--no-rtl",
         help="Whether to reverse text for right-to-left format (default=False). "
-             "Use '--rtl' to reverse the text."
+        "Use '--rtl' to reverse the text.",
     ),
-    output_format: Literal['csv', 'xlsx'] = typer.Option(  # Update output_format parameter
-        'csv', 
-        "--output-format", 
-        "-f", 
-        help="Format to save the output files. Options are 'csv' and 'xlsx'. Defaults to 'csv'."
+    output_format: Literal[
+        "csv", "xlsx"
+    ] = typer.Option(  # Update output_format parameter
+        "csv",
+        "--output-format",
+        "-f",
+        help="Format to save the output files. Options are 'csv' and 'xlsx'. Defaults to 'csv'.",
     ),
     verbose: bool = typer.Option(
-        False, 
-        "--verbose", 
-        "-v", 
-        help="Enable verbose (DEBUG) logging."
+        False, "--verbose", "-v", help="Enable verbose (DEBUG) logging."
     ),
 ):
     """
@@ -41,10 +40,18 @@ def convert_cli(
     else:
         logging.basicConfig(level=logging.INFO)
 
-    logging.info(f"Starting conversion for {pdf_path}, rtl={rtl}, output_format={output_format} ...")
+    logging.info(
+        f"Starting conversion for {pdf_path}, rtl={rtl}, output_format={output_format} ..."
+    )
 
     try:
-        dfs = convert(pdf_path, output_dir=output_dir, rtl=rtl, output_format=output_format, index=False)
+        dfs = convert(
+            pdf_path,
+            output_dir=output_dir,
+            rtl=rtl,
+            output_format=output_format,
+            index=False,
+        )
         logging.info(f"Extracted {len(dfs)} table(s) from {pdf_path}.")
     except FileNotFoundError as fnf_err:
         logging.error(str(fnf_err))

diff --git a/pdf2csv/converter.py b/pdf2csv/converter.py
@@ -14,7 +14,7 @@ def convert(
     pdf_path: str,
     output_dir: Optional[str] = None,
     rtl: bool = False,
-    output_format: Literal['csv', 'xlsx'] = 'csv',  # Use Literal for type checking
+    output_format: Literal["csv", "xlsx"] = "csv",  # Use Literal for type checking
     **kwargs: Any,
 ) -> List[pd.DataFrame]:
     """
@@ -110,12 +110,16 @@ def convert(
 
             # Optionally save to CSV or XLSX
             if output_dir_path is not None:
-                if output_format == 'csv':
-                    csv_filename = output_dir_path / f"{doc_filename}-table-{table_idx}.csv"
+                if output_format == "csv":
+                    csv_filename = (
+                        output_dir_path / f"{doc_filename}-table-{table_idx}.csv"
+                    )
                     df.to_csv(csv_filename, **kwargs)
                     _log.info(f"Saved CSV table #{table_idx} to: {csv_filename}")
-                elif output_format == 'xlsx':
-                    xlsx_filename = output_dir_path / f"{doc_filename}-table-{table_idx}.xlsx"
+                elif output_format == "xlsx":
+                    xlsx_filename = (
+                        output_dir_path / f"{doc_filename}-table-{table_idx}.xlsx"
+                    )
                     df.to_excel(xlsx_filename, **kwargs)
                     _log.info(f"Saved XLSX table #{table_idx} to: {xlsx_filename}")
                 else:

diff --git a/pdf2csv/helpers.py b/pdf2csv/helpers.py
@@ -1,6 +1,7 @@
 import pandas as pd
 
-def ensure_numeric_columns(df: pd.DataFrame, errors: str = 'coerce') -> pd.DataFrame:
+
+def ensure_numeric_columns(df: pd.DataFrame, errors: str = "coerce") -> pd.DataFrame:
     """
     Ensure all columns which are numbers are considered as numeric.
 
@@ -18,5 +19,5 @@ def ensure_numeric_columns(df: pd.DataFrame, errors: str = 'coerce') -> pd.DataF
         The processed DataFrame with numeric columns converted.
     """
     for col in df.columns:
-        df[col] = pd.to_numeric(df[col], errors='ignore')
+        df[col] = pd.to_numeric(df[col], errors="ignore")
     return df
diff --git a/tests/test_convertor.py b/tests/test_convertor.py
@@ -1,6 +1,7 @@
 from pdf2csv import convert
 import pytest
 
+
 @pytest.fixture
 def rtl_pdf():
     return "tests/assets/rtl_test.pdf"
@@ -11,9 +12,7 @@ def test_rtl_convert(rtl_pdf):
     dfs = convert(pdf_path, rtl=True)
     assert len(dfs) == 1
     df = dfs[0]
-    df.columns[0] == 'بلندمدت  ميانگين.اختالف نسبت به  درصد'
+    df.columns[0] == "بلندمدت  ميانگين.اختالف نسبت به  درصد"
     assert len(df.columns) == 6
     assert len(df) == 10
     assert df.iloc[0, 0] == -44
-
-
-Original file line number
+Diff line change
@@ Expand Up / @@ -71,4 +71,4 @@ for df in dfs: @@
     ## TODO:
     - [ ] Convert datatype to numeric
-    - [ ]
+    - [ ]