aws · kirupang-code · Sep 16, 2025 · Sep 17, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,15 +1,15 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v2.5.0
+    rev: v4.4.0
     hooks:
     -   id: check-yaml
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
     -   id: detect-aws-credentials
         args: [--allow-missing-credentials]
 
-- repo: https://github.com/humitos/mirrors-autoflake.git
-  rev: v1.3
+- repo: https://github.com/PyCQA/autoflake
+  rev: v2.2.1
   hooks:
     - id: autoflake
       args: ['--in-place', '--expand-star-imports', '--ignore-init-module-imports', '--remove-all-unused-imports']

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ classifiers=[
 [tool.poetry.dependencies]
 python = "^3.10"
 urllib3 = ">=2.3.0"
-ray = "2.44.0"
+ray = ">=2.44.0"
 semantic-version = "2.10.0"
 pyarrow = "20.0.0"
 pyfunctional = "1.5.0"

diff --git a/src/fmeval/data_loaders/json_data_loader.py b/src/fmeval/data_loaders/json_data_loader.py
@@ -10,7 +10,6 @@
 
 from ray.data.datasource.file_based_datasource import (
     FileBasedDatasource,
-    _resolve_kwargs,
 )
 
 from fmeval.exceptions import EvalAlgorithmInternalError

diff --git a/src/fmeval/eval_algorithms/qa_accuracy.py b/src/fmeval/eval_algorithms/qa_accuracy.py
@@ -33,7 +33,6 @@
     require,
     create_shared_resource,
     cleanup_shared_resource,
-    assert_condition,
 )
 
 F1_SCORE = "f1_score"

diff --git a/src/fmeval/eval_algorithms/util.py b/src/fmeval/eval_algorithms/util.py
@@ -12,7 +12,6 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 from fmeval.constants import (
     DatasetColumns,
-    EVAL_OUTPUT_RECORDS_BATCH_SIZE,
     MEAN,
     NUM_ROWS_DETERMINISTIC,
     DATASET_COLUMNS,
@@ -23,8 +22,6 @@
     CategoryScore,
     DATASET_CONFIGS,
     EVAL_DATASETS,
-    EvalOutput,
-    get_default_prompt_template,
 )
 from fmeval.exceptions import EvalAlgorithmInternalError
 from fmeval.model_runners.composers.composers import PromptComposer

diff --git a/test/unit/data_loaders/test_json_data_loader.py b/test/unit/data_loaders/test_json_data_loader.py
@@ -8,7 +8,6 @@
 from fmeval.data_loaders.json_data_loader import (
     JsonDataLoader,
     JsonDataLoaderConfig,
-    CustomJSONDatasource,
 )
 from fmeval.data_loaders.util import DataConfig
 from typing import Any, Dict, List, NamedTuple, Optional, Union

diff --git a/test/unit/eval_algorithms/test_classification_accuracy_semantic_robustness.py b/test/unit/eval_algorithms/test_classification_accuracy_semantic_robustness.py
@@ -8,19 +8,14 @@
 
 from fmeval.constants import (
     DatasetColumns,
-    MIME_TYPE_JSON,
     BUTTER_FINGER,
     RANDOM_UPPER_CASE,
     WHITESPACE_ADD_REMOVE,
     MEAN,
 )
 from fmeval.eval_algorithms import (
     EvalScore,
-    EvalOutput,
     CategoryScore,
-    BUILT_IN_DATASET_DEFAULT_PROMPT_TEMPLATES,
-    DEFAULT_PROMPT_TEMPLATE,
-    WOMENS_CLOTHING_ECOMMERCE_REVIEWS,
 )
 from fmeval.eval_algorithms.classification_accuracy_semantic_robustness import (
     ClassificationAccuracySemanticRobustnessConfig,

diff --git a/test/unit/eval_algorithms/test_prompt_stereotyping.py b/test/unit/eval_algorithms/test_prompt_stereotyping.py
@@ -8,16 +8,11 @@
 
 from fmeval.constants import (
     DatasetColumns,
-    MIME_TYPE_JSON,
-    DEFAULT_EVAL_RESULTS_PATH,
 )
 from fmeval.eval_algorithms import (
     EvalOutput,
     CategoryScore,
     EvalScore,
-    EvalAlgorithm,
-    DEFAULT_PROMPT_TEMPLATE,
-    CROWS_PAIRS,
 )
 from fmeval.eval_algorithms.prompt_stereotyping import (
     PromptStereotyping,

diff --git a/test/unit/eval_algorithms/test_qa_accuracy.py b/test/unit/eval_algorithms/test_qa_accuracy.py
@@ -18,11 +18,6 @@
     EvalOutput,
     CategoryScore,
     EvalScore,
-    BUILT_IN_DATASET_DEFAULT_PROMPT_TEMPLATES,
-    TRIVIA_QA,
-    BOOLQ,
-    NATURAL_QUESTIONS,
-    DEFAULT_PROMPT_TEMPLATE,
 )
 from fmeval.eval_algorithms.helper_models.helper_model import BertscoreHelperModel
 from fmeval.eval_algorithms.qa_accuracy import (
@@ -41,8 +36,6 @@
     _split,
     _quasi_exact_match_score,
     SCORE_NAMES,
-    SplitWithDelimiter,
-    BertScore,
 )
 from fmeval.exceptions import EvalAlgorithmClientError
 

diff --git a/test/unit/eval_algorithms/test_qa_toxicity.py b/test/unit/eval_algorithms/test_qa_toxicity.py
@@ -7,17 +7,9 @@
 
 from fmeval.constants import (
     DatasetColumns,
-    MIME_TYPE_JSON,
 )
 from fmeval.eval_algorithms import (
     EvalScore,
-    EvalOutput,
-    CategoryScore,
-    NATURAL_QUESTIONS,
-    BUILT_IN_DATASET_DEFAULT_PROMPT_TEMPLATES,
-    TRIVIA_QA,
-    BOOLQ,
-    DEFAULT_PROMPT_TEMPLATE,
 )
 from fmeval.eval_algorithms.helper_models.helper_model import (
     TOXIGEN_SCORE_NAME,

diff --git a/test/unit/eval_algorithms/test_toxicity.py b/test/unit/eval_algorithms/test_toxicity.py
@@ -10,7 +10,6 @@
 
 from fmeval.constants import (
     DatasetColumns,
-    MIME_TYPE_JSON,
     MEAN,
 )
 from fmeval.data_loaders.data_config import DataConfig

diff --git a/test/unit/eval_algorithms/test_util.py b/test/unit/eval_algorithms/test_util.py
@@ -22,7 +22,6 @@
 from fmeval.eval_algorithms import (
     EvalAlgorithm,
     DATASET_CONFIGS,
-    EVAL_DATASETS,
     BOOLQ,
     TRIVIA_QA,
     NATURAL_QUESTIONS,
@@ -35,7 +34,6 @@
     WOMENS_CLOTHING_ECOMMERCE_REVIEWS,
     REAL_TOXICITY_PROMPTS,
     REAL_TOXICITY_PROMPTS_CHALLENGING,
-    EvalOutput,
     CategoryScore,
 )
 from fmeval.eval_algorithms.eval_algorithm import EvalScore