Source code for eval_framework.metrics.completion.format_checker

import json
import re

from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.shared.types import Completion


[docs] class CheckJsonFormat(BaseMetric[Completion]): NAME = "JSON Format" def _preprocess(self, completion: str) -> str: completion = completion.strip() for prefix in ["```json", "```Json", "```JSON", "```"]: completion = completion.removeprefix(prefix) completion = completion.removesuffix("```") completion = completion.strip() return completion
[docs] def calculate(self, response: Completion) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] json_text = self._preprocess(response.completion) try: json.loads(json_text) is_valid_json = True except ValueError as _: is_valid_json = False return [ MetricResult(metric_name=self.NAME, value=float(is_valid_json), higher_is_better=True, error=response.error) ]
[docs] class CheckPostScriptFormat(BaseMetric[Completion]): """ This metric is honestly not that great In the original IFEval implementation it just checks whether the text contains the string (P.)P.S. or variants thereof such as p. s. It doesn't check for parsing """ NAME = "Postscript Format"
[docs] def calculate(self, response: Completion) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] postscript_pattern = r"\s*(P\.S\.|P\.P\.S\.)" postscript = re.findall(postscript_pattern, response.completion, flags=re.MULTILINE) return [ MetricResult( metric_name=self.NAME, value=1.0 if postscript else 0.0, higher_is_better=True, error=response.error ) ]