Source code for eval_framework.metrics.completion.exponential_similarity

from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.shared.types import Completion, Error


[docs] class ExponentialSimilarity(BaseMetric[Completion]): NAME = "ExponentialSimilarity"
[docs] def calculate(self, response: Completion) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] ground_truths = [gt for gt in response.ground_truth_list if gt is not None] if not ground_truths: return [MetricResult(metric_name=self.NAME, value=0.0, higher_is_better=True, error=response.error)] try: # Try to calculate exponential similarity for each ground truth similarities = [] for gt in ground_truths: try: gt_float = float(gt) completion_float = float(response.completion) similarities.append(calculate_exponential_similarity(gt_float, completion_float)) except (ValueError, TypeError): # Skip this ground truth if conversion fails continue # If we have any valid similarities, return the max if similarities: return [ MetricResult( metric_name=self.NAME, value=max(similarities), higher_is_better=True, error=response.error ) ] else: # If all conversions failed, return an error error = Error( error_class="ValueError", message="Could not convert ground truth or completion to float", traceback="", ) return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=error)] except Exception as e: error = Error(error_class=e.__class__.__name__, message=str(e), traceback="") return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=error)]
[docs] def calculate_exponential_similarity(p_true: float, p_pred: float) -> float: """ Compute the exponential similarity (SpaceDigest version) between the gold percentage and predicted value. Parameters: - p_true (float): The gold/reference percentage. - p_pred (float): The predicted scalar. - d (float): Base of the exponent. Default is 2. - c (float): Coefficient in exponent. Default is 10. Returns: - float: Similarity score between 0 and 1. """ d = 2 c = 10 return d ** (-c * abs(p_true / 100 - p_pred / 100))