Source code for eval_framework.metrics.llm.base
import traceback
from eval_framework.llm.base import BaseLLM
from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.shared.types import Completion, Error
[docs]
class BaseLLMJudgeMetric(BaseMetric[Completion]):
def __init__(self, llm_judge: BaseLLM, randomize_order: bool = False) -> None:
self._llm_judge = llm_judge
self._randomize_order = randomize_order
def _create_metric_result(
self,
metric_name: str,
higher_is_better: bool,
value: float | None,
llm_judge_prompt: str | None = None,
llm_judge_response: str | None = None,
code_execution_trace: str | None = None,
error: Error | None = None,
) -> MetricResult:
"""Helper method to create MetricResult with consistent structure."""
return MetricResult(
metric_name=metric_name,
value=value,
higher_is_better=higher_is_better,
llm_judge_prompt=llm_judge_prompt,
llm_judge_response=llm_judge_response,
code_execution_trace=code_execution_trace,
error=Error(error_class=error.__class__.__name__, message=str(error), traceback=traceback.format_exc())
if error
else None,
)