Source code for eval_framework.metrics.loglikelihood.base
import math
from eval_framework.metrics.base import BaseMetric
from eval_framework.shared.types import Loglikelihood
[docs]
class BaseLoglikelihoodMetric(BaseMetric[Loglikelihood]):
"""Base class for metrics that operate on loglikelihood responses."""
def __init__(
self,
*,
len_normalised: bool = True,
) -> None:
self.len_normalised = len_normalised
def _normalise_text(self, text: str) -> str:
return text.strip().lower()
def _length_normalise_loglikelihoods(self, loglikelihoods: dict) -> dict:
"""Return a dict of length-normalised loglikelihoods."""
output = {}
for k, v in loglikelihoods.items():
length = len(k)
output[k] = v / length if length > 0 else v
return output
def _compute_probabilities(self, loglikelihoods: dict) -> tuple[dict, dict]:
"""Compute probabilities from loglikelihoods, with optional length normalisation."""
if self.len_normalised:
loglikelihoods = self._length_normalise_loglikelihoods(loglikelihoods)
return loglikelihoods, self._softmax(loglikelihoods)
def _gather_ground_truths(self, response: Loglikelihood) -> set[str]:
"""Extract and normalize ground truth completions from a Loglikelihood response."""
ground_truths = set(
self._normalise_text(gt)
for gt in (response.ground_truth if isinstance(response.ground_truth, list) else [response.ground_truth])
)
return ground_truths
def _softmax(self, log_probs: dict) -> dict:
"""Convert log-likelihoods to probabilities with softmax."""
vals = list(log_probs.values())
if not vals: # no valid entries
return {}
m = max(vals)
exp_vals = [math.exp(x - m) for x in vals]
total = sum(exp_vals)
return {k: ev / total for k, ev in zip(log_probs.keys(), exp_vals)}