pr comments

cheungrowan · cheungrowan · commit 9edd56b51f4e · 2024-01-29T10:51:20.000-05:00
diff --git a/arthur_bench/scoring/qa_quality.py b/arthur_bench/scoring/qa_quality.py
@@ -84,6 +84,11 @@ def validate_batch(
                 "context is required for this scoring method. Please provide a "
                 "dataframe column or a list of your context strings in the Test Suite."
             )
+
+        if reference_batch is not None:
+            raise UserValueError(
+                "using reference is not currently supported for qa correctness"
+            )
         return input_text_batch, context_batch
 
     async def arun_batch(
diff --git a/arthur_bench/scoring/summary_quality.py b/arthur_bench/scoring/summary_quality.py
@@ -187,24 +187,15 @@ def run(
         )
 
     def _parse_response(self, response: Dict[str, Any]) -> ScoreResult:
-        score = None
-        if "text" in response:
-            llmchoice = response["text"][:3]
-            score = LLM_CHOICE_TO_FLOAT.get(llmchoice)
-            if score is not None:
-                result = ScoreResult(
-                    score=score,
-                    category=LLM_CHOICE_TO_CATEGORIES.get(
-                        llmchoice, LLM_CHOICE_TO_CATEGORIES["default"]
-                    ),
-                )
-
-        # return -1.0 if the LLMChain returns an invalid result
-        if score is None:
-            result = ScoreResult(
-                score=-1.0, category=LLM_CHOICE_TO_CATEGORIES["default"]
+        llmchoice = response["text"][:3] if "text" in response else None
+
+        if llmchoice in LLM_CHOICE_TO_FLOAT:
+            return ScoreResult(
+                score=LLM_CHOICE_TO_FLOAT[llmchoice],
+                category=LLM_CHOICE_TO_CATEGORIES[llmchoice],
             )
-        return result
+        else:
+            return ScoreResult(score=-1.0, category=LLM_CHOICE_TO_CATEGORIES["default"])
 
     @staticmethod
     def validate_batch(