Source code for eval_framework.result_processors.base

from abc import ABC, abstractmethod
from pathlib import Path

from dotenv import load_dotenv
from pydantic import BaseModel, ConfigDict

from eval_framework.shared.types import Completion, Error, Loglikelihood
from eval_framework.tasks.eval_config import EvalConfig

MAIN = "eval_framework_results"

load_dotenv()


[docs] class Result(BaseModel): model_config = ConfigDict(extra="forbid") id: int subject: str num_fewshot: int llm_name: str task_name: str metric_class_name: str metric_name: str key: str | None value: float | None higher_is_better: bool prompt: str response: str llm_judge_prompt: str | None = None llm_judge_response: str | None = None code_execution_trace: str | None = None error: Error | None = None
[docs] class ResultProcessor(ABC):
[docs] @abstractmethod def save_metadata(self, metadata: dict) -> None: """Save metadata.""" pass
[docs] @abstractmethod def load_metadata(self) -> dict: """Load metadata.""" pass
[docs] @abstractmethod def save_responses(self, responses: list[Completion | Loglikelihood]) -> None: """Save a list of response objects (overwrite a file).""" pass
[docs] @abstractmethod def save_response(self, response: Completion | Loglikelihood) -> None: """Save a single response object (append into a file).""" pass
[docs] @abstractmethod def load_responses(self) -> list[Completion | Loglikelihood]: """Load a list of response objects.""" pass
[docs] @abstractmethod def save_metrics_results(self, results: list[Result]) -> None: """Save the results of the metrics (overwrite a file).""" pass
[docs] @abstractmethod def save_metrics_result(self, result: Result) -> None: """Save a single metric result (append into a file).""" pass
[docs] @abstractmethod def save_aggregated_results(self, result: dict[str, float | None]) -> None: """Save the aggregated results.""" pass
[docs] @abstractmethod def load_metrics_results(self) -> list[Result]: """Load the aggregated results.""" pass
[docs] class ResultsUploader(ABC):
[docs] @abstractmethod def upload(self, llm_name: str, config: EvalConfig, output_dir: Path) -> bool: """Upload relevant parts from `output_dir` to the desired destination. Returns True if upload was successful, False otherwise. """ pass