|
12 | 12 | from ragas.evaluation import EvaluationDataset, EvaluationResult, RunConfig, evaluate |
13 | 13 | from ragas.metrics import Metric |
14 | 14 | from ragas.metrics._domain_specific_rubrics import ( # the rubrics we must instantiate are located inside of a file marked as private |
15 | | - DEFAULT_WITH_REFERENCE_RUBRICS, |
16 | 15 | RubricsScore, |
| 16 | + SingleTurnPrompt, |
17 | 17 | ) |
18 | 18 |
|
19 | 19 | # Local |
|
22 | 22 |
|
23 | 23 | logger = setup_logger(__name__) |
24 | 24 |
|
| 25 | +OLD_DEFAULT_WITH_REFERENCE_RUBRICS = { |
| 26 | + "score1_description": "The response is incorrect, irrelevant, or does not align with the ground truth.", |
| 27 | + "score2_description": "The response partially matches the ground truth but includes significant errors, omissions, or irrelevant information.", |
| 28 | + "score3_description": "The response generally aligns with the ground truth but may lack detail, clarity, or have minor inaccuracies.", |
| 29 | + "score4_description": "The response is mostly accurate and aligns well with the ground truth, with only minor issues or missing details.", |
| 30 | + "score5_description": "The response is fully accurate, aligns completely with the ground truth, and is clear and detailed.", |
| 31 | +} |
| 32 | + |
25 | 33 |
|
26 | 34 | class Sample(TypedDict): |
27 | 35 | """ |
@@ -256,9 +264,8 @@ def _generate_answers_from_model( |
256 | 264 |
|
257 | 265 | @staticmethod |
258 | 266 | def _get_metrics() -> List[Metric]: |
259 | | - # default set of metrics |
260 | 267 | return [ |
261 | 268 | RubricsScore( |
262 | | - rubrics=DEFAULT_WITH_REFERENCE_RUBRICS, |
| 269 | + rubrics=OLD_DEFAULT_WITH_REFERENCE_RUBRICS, |
263 | 270 | ) |
264 | 271 | ] |
0 commit comments