Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions ais_bench/benchmark/configs/summarizers/groups/mmlu_pro.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,22 @@
mmlu_pro_summary_groups = [
{'name': 'mmlu_pro', 'subsets': ['mmlu_pro_' + c.replace(' ', '_') for c in categories]},
]

_mmlu_pro_all = ['mmlu_pro_' + c.replace(' ', '_') for c in categories]
_mmlu_pro_weights = {
'mmlu_pro_math': 1351,
'mmlu_pro_physics': 1299,
'mmlu_pro_chemistry': 1132,
'mmlu_pro_law': 1101,
'mmlu_pro_engineering': 969,
'mmlu_pro_other': 924,
'mmlu_pro_economics': 844,
'mmlu_pro_health': 818,
'mmlu_pro_psychology': 798,
'mmlu_pro_business': 789,
'mmlu_pro_biology': 717,
'mmlu_pro_philosophy': 499,
'mmlu_pro_computer_science': 410,
'mmlu_pro_history': 381,
}
mmlu_pro_summary_groups.append({'name': 'mmlu_pro-weighted', 'subsets': _mmlu_pro_all, 'weights': _mmlu_pro_weights})
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from aiohttp import ClientSession
from multiprocessing import BoundedSemaphore

from bfcl_eval.utils import make_json_serializable

from ais_bench.benchmark.openicl.icl_retriever import BaseRetriever
from ais_bench.benchmark.registry import MODELS
from ais_bench.benchmark.utils.prompt import PromptList
Expand Down Expand Up @@ -600,6 +602,7 @@ async def _inference_multi_turn(self, data: dict, finial_output: FunctionCallOut
await self.status_counter.case_finish()
if all_model_response:
finial_output.tool_calls = all_model_response
finial_output.inference_log = make_json_serializable(finial_output.inference_log)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There's a typo in the variable name finial_output. It should be final_output. This typo is present throughout the file, including in function signatures. Correcting this would improve code readability and maintainability.

await self.output_handler.report_cache_info(
index, prompt_list, finial_output, data_abbr
)
Expand Down
Loading