Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ class EventAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] means answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_EVENT_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -241,7 +243,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate financial event analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_EVENT_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ class EventIdentificationGrader(LLMGrader):
>>> print(result.rank) # [2, 1] means answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_EVENT_IDENTIFICATION_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -218,7 +220,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate financial event identification quality by comparing two responses",
model=model,
template=template or DEFAULT_EVENT_IDENTIFICATION_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ class CharacteristicsAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_CHARACTERISTICS_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -236,7 +238,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate industry characteristics analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_CHARACTERISTICS_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
4 changes: 3 additions & 1 deletion cookbooks/finance_grader/industry_research/risk_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ class RiskAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_RISK_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -228,7 +230,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate financial risk analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_RISK_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ class UnderlyingComparisonGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_UNDERLYING_COMPARISON_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -232,7 +234,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate underlying comparison analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_UNDERLYING_COMPARISON_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class ConceptExplanationGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_CONCEPT_EXPLANATION_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -207,7 +209,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate macroeconomic concept explanation quality by comparing two responses",
model=model,
template=template or DEFAULT_CONCEPT_EXPLANATION_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
4 changes: 3 additions & 1 deletion cookbooks/finance_grader/macro_analysis/macro_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ class MacroAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_MACRO_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -234,7 +236,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate macroeconomic analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_MACRO_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ class FundamentalAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_FUNDAMENTAL_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -232,7 +234,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate fundamental analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_FUNDAMENTAL_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
4 changes: 3 additions & 1 deletion cookbooks/finance_grader/stock_analysis/overall_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ class OverallLogicGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_OVERALL_LOGIC_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -224,7 +226,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate overall logic and structure quality by comparing two responses",
model=model,
template=template or DEFAULT_OVERALL_LOGIC_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ class StockRiskAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_STOCK_RISK_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -231,7 +233,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate stock risk analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_STOCK_RISK_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ class ValuationAnalysisGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_VALUATION_ANALYSIS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -222,7 +224,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate valuation analysis quality by comparing two responses",
model=model,
template=template or DEFAULT_VALUATION_ANALYSIS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
4 changes: 3 additions & 1 deletion cookbooks/finance_grader/stock_search/search_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class SearchIntegrityGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_SEARCH_INTEGRITY_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -207,7 +209,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate stock search integrity and completeness by comparing two responses",
model=model,
template=template or DEFAULT_SEARCH_INTEGRITY_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
4 changes: 3 additions & 1 deletion cookbooks/finance_grader/stock_search/search_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class SearchRelevanceGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_SEARCH_RELEVANCE_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -207,7 +209,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate stock search relevance by comparing two responses",
model=model,
template=template or DEFAULT_SEARCH_RELEVANCE_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
4 changes: 3 additions & 1 deletion cookbooks/finance_grader/stock_search/search_timeliness.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class SearchTimelinessGrader(LLMGrader):
>>> print(result.rank) # [2, 1] if answer_2 is better
"""

DEFAULT_TEMPLATE = DEFAULT_SEARCH_TIMELINESS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -207,7 +209,7 @@ def __init__(
mode=GraderMode.LISTWISE,
description="Evaluate stock search timeliness by comparing two responses",
model=model,
template=template or DEFAULT_SEARCH_TIMELINESS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
2 changes: 1 addition & 1 deletion openjudge/graders/agent/action/action_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class ActionAlignmentGrader(LLMGrader):
def __init__(
self,
model: BaseChatModel | dict,
template: Optional[PromptTemplate] = DEFAULT_TEMPLATE,
template: Optional[PromptTemplate] = None,
language: LanguageEnum = LanguageEnum.EN,
strategy: BaseEvaluationStrategy | None = None,
):
Expand Down
6 changes: 4 additions & 2 deletions openjudge/graders/agent/memory/memory_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,12 @@ class MemoryAccuracyGrader(LLMGrader):
>>> print(f"Score: {result.score}") # Expected: 1.0
"""

DEFAULT_TEMPLATE = DEFAULT_MEMORY_ACCURACY_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
template: Optional[PromptTemplate] = DEFAULT_MEMORY_ACCURACY_TEMPLATE,
template: Optional[PromptTemplate] = None,
language: LanguageEnum = LanguageEnum.EN,
strategy: BaseEvaluationStrategy | None = None,
):
Expand All @@ -192,7 +194,7 @@ def __init__(
mode=GraderMode.POINTWISE,
description="Evaluate memory accuracy",
model=model,
template=template or DEFAULT_MEMORY_ACCURACY_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
6 changes: 4 additions & 2 deletions openjudge/graders/agent/memory/memory_detail_preservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,12 @@ class MemoryDetailPreservationGrader(LLMGrader):
>>> print(f"Score: {result.score}") # Expected: 1.0
"""

DEFAULT_TEMPLATE = DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
template: Optional[PromptTemplate] = DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE,
template: Optional[PromptTemplate] = None,
language: LanguageEnum = LanguageEnum.EN,
strategy: BaseEvaluationStrategy | None = None,
):
Expand All @@ -192,7 +194,7 @@ def __init__(
mode=GraderMode.POINTWISE,
description="Evaluate memory detail preservation",
model=model,
template=template or DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ class MemoryRetrievalEffectivenessGrader(LLMGrader):
>>> print(f"Score: {result.score}") # Expected: 1.0
"""

DEFAULT_TEMPLATE = DEFAULT_MEMORY_RETRIEVAL_EFFECTIVENESS_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
Expand All @@ -193,7 +195,7 @@ def __init__(
mode=GraderMode.POINTWISE,
description="Evaluate memory retrieval effectiveness",
model=model,
template=template or DEFAULT_MEMORY_RETRIEVAL_EFFECTIVENESS_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
6 changes: 4 additions & 2 deletions openjudge/graders/agent/plan/plan_feasibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,12 @@ class PlanFeasibilityGrader(LLMGrader):
>>> print(f"Score: {result.score}") # Expected: 1.0
"""

DEFAULT_TEMPLATE = DEFAULT_PLAN_FEASIBILITY_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
template: Optional[PromptTemplate] = DEFAULT_PLAN_FEASIBILITY_TEMPLATE,
template: Optional[PromptTemplate] = None,
language: LanguageEnum = LanguageEnum.EN,
strategy: BaseEvaluationStrategy | None = None,
):
Expand All @@ -195,7 +197,7 @@ def __init__(
mode=GraderMode.POINTWISE,
description="Evaluate plan feasibility",
model=model,
template=template or DEFAULT_PLAN_FEASIBILITY_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
6 changes: 4 additions & 2 deletions openjudge/graders/agent/reflection/reflection_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,12 @@ class ReflectionAccuracyGrader(LLMGrader):
>>> print(f"Score: {result.score}") # Expected: 1.0
"""

DEFAULT_TEMPLATE = DEFAULT_REFLECTION_ACCURACY_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
template: Optional[PromptTemplate] = DEFAULT_REFLECTION_ACCURACY_TEMPLATE,
template: Optional[PromptTemplate] = None,
language: LanguageEnum = LanguageEnum.EN,
strategy: BaseEvaluationStrategy | None = None,
):
Expand All @@ -191,7 +193,7 @@ def __init__(
mode=GraderMode.POINTWISE,
description="Evaluate reflection accuracy",
model=model,
template=template or DEFAULT_REFLECTION_ACCURACY_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,12 @@ class ReflectionOutcomeUnderstandingGrader(LLMGrader):
>>> print(f"Score: {result.score}") # Expected: 1.0
"""

DEFAULT_TEMPLATE = DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE

def __init__(
self,
model: BaseChatModel | dict,
template: Optional[PromptTemplate] = DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE,
template: Optional[PromptTemplate] = None,
language: LanguageEnum = LanguageEnum.EN,
strategy: BaseEvaluationStrategy | None = None,
):
Expand All @@ -326,7 +328,7 @@ def __init__(
mode=GraderMode.POINTWISE,
description="Evaluate reflection outcome understanding",
model=model,
template=template or DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE,
template=template or self.DEFAULT_TEMPLATE,
language=language,
strategy=strategy,
)
Expand Down
Loading