Source code for eval_framework.metrics.loglikelihood.accuracy_loglikelihood

from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.shared.types import Loglikelihood


[docs] class AccuracyLoglikelihood(BaseMetric[Loglikelihood]): NAME = "Accuracy Loglikelihood"
[docs] def calculate(self, response: Loglikelihood) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] ground_truth_list = response.ground_truth_list completion_text = max(response.loglikelihoods, key=response.loglikelihoods.get) # type: ignore[arg-type] return [ MetricResult( metric_name=self.NAME, value=float(completion_text in ground_truth_list), higher_is_better=True, error=response.error, ) ]
[docs] class AccuracyNormLoglikelihood(BaseMetric[Loglikelihood]): NAME = "Accuracy Normalized Loglikelihood"
[docs] def calculate(self, response: Loglikelihood) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] ground_truth_list = response.ground_truth_list output_len_normalized = {} for k, v in response.loglikelihoods.items(): completion_length = len(k) if completion_length != 0: output_len_normalized[k] = v / completion_length else: output_len_normalized[k] = v model_output_len_normalized = max(output_len_normalized, key=output_len_normalized.get) # type:ignore return [ MetricResult( metric_name=self.NAME, value=float(model_output_len_normalized in ground_truth_list), higher_is_better=True, error=response.error, ) ]