Source code for eval_framework.metrics.completion.cwe_accuracy

import re

from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.shared.types import Completion, Error



[docs]
class CWEAccuracy(BaseMetric[Completion]):
    """Metric for Common Word Extraction tasks"""

    NAME = "CWEAccuracy"


[docs]
    def calculate(self, response: Completion) -> list[MetricResult]:
        if response.error is not None:
            return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)]

        ground_truths = [gt for gt in response.ground_truth_list if gt is not None]
        if not ground_truths:
            return [MetricResult(metric_name=self.NAME, value=0.0, higher_is_better=True, error=response.error)]

        try:
            # Get model's answer
            model_answer = response.completion

            # Check if all words in the correct answer are present in the model's answer
            is_correct = self._is_answer_correct(ground_truths, model_answer)

            return [
                MetricResult(
                    metric_name=self.NAME, value=1.0 if is_correct else 0.0, higher_is_better=True, error=response.error
                )
            ]
        except Exception as e:
            error = Error(error_class=e.__class__.__name__, message=str(e), traceback="")
            return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=error)]


    def _is_answer_correct(self, correct_answer: list[str], model_answer: str) -> bool:
        """Check if all words in correct_answer are present in model_answer as whole words"""
        model_answer = model_answer.strip().lower()
        correct_answer = [correct.strip().lower() for correct in correct_answer]

        # For each word in the correct answer, check if it exists as a whole word in the model answer
        for word in correct_answer:
            # Create a regex pattern that matches the word as a whole word
            # \b represents a word boundary
            pattern = r"\b" + re.escape(word) + r"\b"
            if not re.search(pattern, model_answer):
                return False

        return True