From ca7286993da4397d40a75f05249f6d37ed21a94b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 9 Dec 2025 17:21:58 +0000
Subject: [PATCH 1/5] Initial plan


From f5be456b055e98de84b4202b850959a6169dea4e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 9 Dec 2025 17:31:55 +0000
Subject: [PATCH 2/5] Add is_reasoning_model parameter support to QAEvaluator

Co-authored-by: nagkumar91 <4727422+nagkumar91@users.noreply.github.com>
---
 .../ai/evaluation/_evaluators/_qa/_qa.py      | 14 +++++++++-----
 .../unittests/test_built_in_evaluator.py      | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
index df095f67ba97..f582266b1cfb 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
@@ -35,6 +35,9 @@ class QAEvaluator(MultiEvaluatorBase[Union[str, float]]):
     :type similarity_threshold: int
     :param f1_score_threshold: The threshold for F1 score evaluation. Default is 0.5.
     :type f1_score_threshold: float
+    :keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
+        This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
+    :paramtype is_reasoning_model: bool
     :return: A callable class that evaluates and generates metrics for "question-answering" scenario.
     :param kwargs: Additional arguments to pass to the evaluator.
     :type kwargs: Any
@@ -87,6 +90,7 @@ def __init__(
         fluency_threshold: int = 3,
         similarity_threshold: int = 3,
         f1_score_threshold: float = 0.5,
+        is_reasoning_model: bool = False,
         **kwargs,
     ):
         # Type checking
@@ -102,11 +106,11 @@ def __init__(
                 raise TypeError(f"{name} must be an int or float, got {type(value)}")
 
         evaluators = [
-            GroundednessEvaluator(model_config, threshold=groundedness_threshold),
-            RelevanceEvaluator(model_config, threshold=relevance_threshold),
-            CoherenceEvaluator(model_config, threshold=coherence_threshold),
-            FluencyEvaluator(model_config, threshold=fluency_threshold),
-            SimilarityEvaluator(model_config, threshold=similarity_threshold),
+            GroundednessEvaluator(model_config, threshold=groundedness_threshold, is_reasoning_model=is_reasoning_model),
+            RelevanceEvaluator(model_config, threshold=relevance_threshold, is_reasoning_model=is_reasoning_model),
+            CoherenceEvaluator(model_config, threshold=coherence_threshold, is_reasoning_model=is_reasoning_model),
+            FluencyEvaluator(model_config, threshold=fluency_threshold, is_reasoning_model=is_reasoning_model),
+            SimilarityEvaluator(model_config, threshold=similarity_threshold, is_reasoning_model=is_reasoning_model),
             F1ScoreEvaluator(threshold=f1_score_threshold),
         ]
         super().__init__(evaluators=evaluators, **kwargs)
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
index 9bfbc85721eb..bb0d276b0b30 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
@@ -9,6 +9,7 @@
     RetrievalEvaluator,
     RelevanceEvaluator,
     GroundednessEvaluator,
+    QAEvaluator,
 )
 
 
@@ -243,3 +244,21 @@ def test_groundedness_evaluator_missing_required_inputs(self, mock_model_config)
             "Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response' are required."
             in exc_info.value.args[0]
         )
+
+    def test_qa_evaluator_is_reasoning_model_default(self, mock_model_config):
+        """Test QAEvaluator initializes with is_reasoning_model defaulting to False"""
+        qa_eval = QAEvaluator(model_config=mock_model_config)
+        
+        # Check that all model-based evaluators have is_reasoning_model set to False
+        for evaluator in qa_eval._evaluators:
+            if hasattr(evaluator, '_is_reasoning_model'):
+                assert evaluator._is_reasoning_model is False
+
+    def test_qa_evaluator_is_reasoning_model_true(self, mock_model_config):
+        """Test QAEvaluator properly passes is_reasoning_model=True to sub-evaluators"""
+        qa_eval = QAEvaluator(model_config=mock_model_config, is_reasoning_model=True)
+        
+        # Check that all model-based evaluators have is_reasoning_model set to True
+        for evaluator in qa_eval._evaluators:
+            if hasattr(evaluator, '_is_reasoning_model'):
+                assert evaluator._is_reasoning_model is True

From 89aa78575b7a161cbbc167b38429e6314f06a7ff Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <nagkumar91@users.noreply.github.com>
Date: Tue, 9 Dec 2025 15:24:08 -0800
Subject: [PATCH 3/5] Update
 sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../tests/unittests/test_built_in_evaluator.py                   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
index bb0d276b0b30..72a7908754ad 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
@@ -248,7 +248,6 @@ def test_groundedness_evaluator_missing_required_inputs(self, mock_model_config)
     def test_qa_evaluator_is_reasoning_model_default(self, mock_model_config):
         """Test QAEvaluator initializes with is_reasoning_model defaulting to False"""
         qa_eval = QAEvaluator(model_config=mock_model_config)
-        
         # Check that all model-based evaluators have is_reasoning_model set to False
         for evaluator in qa_eval._evaluators:
             if hasattr(evaluator, '_is_reasoning_model'):

From fb69e37725626f01f7e9dc6f19ce9711e8a5a61f Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <nagkumar91@users.noreply.github.com>
Date: Tue, 9 Dec 2025 15:24:18 -0800
Subject: [PATCH 4/5] Update
 sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../tests/unittests/test_built_in_evaluator.py                   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
index 72a7908754ad..4af60eefbca2 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py
@@ -256,7 +256,6 @@ def test_qa_evaluator_is_reasoning_model_default(self, mock_model_config):
     def test_qa_evaluator_is_reasoning_model_true(self, mock_model_config):
         """Test QAEvaluator properly passes is_reasoning_model=True to sub-evaluators"""
         qa_eval = QAEvaluator(model_config=mock_model_config, is_reasoning_model=True)
-        
         # Check that all model-based evaluators have is_reasoning_model set to True
         for evaluator in qa_eval._evaluators:
             if hasattr(evaluator, '_is_reasoning_model'):

From 363bec4db9e63bce4a74fb4c635c11fd4e3f1419 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 9 Dec 2025 23:30:32 +0000
Subject: [PATCH 5/5] Apply black formatting to _qa.py evaluators list

Co-authored-by: nagkumar91 <4727422+nagkumar91@users.noreply.github.com>
---
 .../ai/evaluation/_evaluators/_qa/_qa.py      | 30 +++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
index f582266b1cfb..c300552a32bc 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
@@ -106,11 +106,31 @@ def __init__(
                 raise TypeError(f"{name} must be an int or float, got {type(value)}")
 
         evaluators = [
-            GroundednessEvaluator(model_config, threshold=groundedness_threshold, is_reasoning_model=is_reasoning_model),
-            RelevanceEvaluator(model_config, threshold=relevance_threshold, is_reasoning_model=is_reasoning_model),
-            CoherenceEvaluator(model_config, threshold=coherence_threshold, is_reasoning_model=is_reasoning_model),
-            FluencyEvaluator(model_config, threshold=fluency_threshold, is_reasoning_model=is_reasoning_model),
-            SimilarityEvaluator(model_config, threshold=similarity_threshold, is_reasoning_model=is_reasoning_model),
+            GroundednessEvaluator(
+                model_config,
+                threshold=groundedness_threshold,
+                is_reasoning_model=is_reasoning_model,
+            ),
+            RelevanceEvaluator(
+                model_config,
+                threshold=relevance_threshold,
+                is_reasoning_model=is_reasoning_model,
+            ),
+            CoherenceEvaluator(
+                model_config,
+                threshold=coherence_threshold,
+                is_reasoning_model=is_reasoning_model,
+            ),
+            FluencyEvaluator(
+                model_config,
+                threshold=fluency_threshold,
+                is_reasoning_model=is_reasoning_model,
+            ),
+            SimilarityEvaluator(
+                model_config,
+                threshold=similarity_threshold,
+                is_reasoning_model=is_reasoning_model,
+            ),
             F1ScoreEvaluator(threshold=f1_score_threshold),
         ]
         super().__init__(evaluators=evaluators, **kwargs)