Source code for eval_framework.metrics.completion.language_checker
from eval_framework.exceptions import LogicError
from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.metrics.llm.graders.language import AVAILABLE_LANGUAGES
from eval_framework.shared.types import Completion
[docs]
class LanguageChecker(BaseMetric[Completion]):
NAME = "Language Check"
[docs]
def calculate(self, response: Completion) -> list[MetricResult]:
if response.error is not None:
return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)]
if response.ground_truth is None:
raise LogicError("Language detection needs ground_truth.")
if response.ground_truth not in AVAILABLE_LANGUAGES:
raise LogicError("Checking for unknown or unavailable language.")
completion_language = response.get_completion_language()
target_language = response.ground_truth
value = float(completion_language == target_language)
return [MetricResult(metric_name=self.NAME, value=value, higher_is_better=True, error=response.error)]
[docs]
class GermanCompletionChecker(BaseMetric[Completion]):
NAME = "German Completion Check"
[docs]
def calculate(self, response: Completion) -> list[MetricResult]:
if response.error is not None:
return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)]
raw_completion_language = response.get_raw_completion_language()
value = float(raw_completion_language == "de")
return [MetricResult(metric_name=self.NAME, value=value, higher_is_better=True, error=response.error)]
[docs]
class LanguageConsistencyChecker(BaseMetric[Completion]):
NAME = "Language Consistency"
[docs]
def calculate(self, response: Completion) -> list[MetricResult]:
if response.error is not None:
return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)]
completion_language = response.get_completion_language()
target_language = response.get_instruction_language()
if completion_language == target_language == "":
return [] # No language information could be determined
else:
value = float(completion_language == target_language)
return [MetricResult(metric_name=self.NAME, value=value, higher_is_better=True, error=response.error)]
[docs]
class LanguageRawConsistencyChecker(BaseMetric[Completion]):
NAME = "Language Consistency Raw"
[docs]
def calculate(self, response: Completion) -> list[MetricResult]:
if response.error is not None:
return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)]
raw_completion_language = response.get_raw_completion_language()
target_language = response.get_instruction_language()
if raw_completion_language == target_language == "":
return [] # No language information could be determined
else:
value = float(raw_completion_language == target_language)
return [
MetricResult(
metric_name=self.NAME,
value=value,
higher_is_better=True,
error=response.error,
)
]