Source code for eval_framework.tasks.benchmarks.naturalqs_open

from typing import Any

from eval_framework.metrics.completion.accuracy_completion import AccuracyCompletion
from eval_framework.metrics.completion.f1 import F1
from eval_framework.metrics.loglikelihood.accuracy_loglikelihood import (
    AccuracyLoglikelihood,
    AccuracyNormLoglikelihood,
)
from eval_framework.metrics.loglikelihood.bits_per_byte import BitsPerByteLoglikelihood
from eval_framework.tasks.base import NO_SUBJECT, BaseTask, Language, ResponseType
from eval_framework.tasks.utils import get_n_letters


[docs] class NaturalQsOpen(BaseTask[str]): NAME = "NaturalQsOpen" DATASET_PATH = "google-research-datasets/nq_open" SAMPLE_SPLIT = "validation" FEWSHOT_SPLIT = "train" RESPONSE_TYPE = ResponseType.COMPLETION METRICS = [AccuracyCompletion, F1] SUBJECTS = [NO_SUBJECT] PERTURBATION_UNMODIFIABLE_WORDS = ["Question", "Answer"] LANGUAGE = Language.ENG def __init__(self, num_fewshot: int = 0) -> None: super().__init__(num_fewshot) self.stop_sequences = ["Question:", "Q:", "\n\n"] self.max_tokens = 50 def _get_instruction_text(self, item: dict[str, Any]) -> str: return f"Question: {item.get('question', '')}" def _get_cue_text(self, item: dict[str, Any]) -> str: return "Answer:" def _get_ground_truth(self, item: dict[str, Any]) -> list[str]: return [f" {a}" for a in item.get("answer", [])] def _get_fewshot_target_text(self, item: dict[str, Any]) -> str: ground_truth = self._get_ground_truth(item) assert ground_truth is not None return f"{self._get_cue_text(item)}{ground_truth}"
[docs] class NaturalQsOpenCloze(BaseTask[str]): NAME = "NaturalQsOpenCloze" DATASET_PATH = "allenai/nq-gen2mc" SAMPLE_SPLIT = "validation" FEWSHOT_SPLIT = "validation" RESPONSE_TYPE = ResponseType.LOGLIKELIHOODS METRICS = [AccuracyLoglikelihood, AccuracyNormLoglikelihood, BitsPerByteLoglikelihood] SUBJECTS = [NO_SUBJECT] PERTURBATION_UNMODIFIABLE_WORDS = ["Question", "Answer"] LANGUAGE = Language.ENG def _get_instruction_text(self, item: dict[str, Any]) -> str: return f"Question: {item.get('question', '')}\n" def _get_ground_truth(self, item: dict[str, Any]) -> str | None: texts = item.get("choices", {}).get("text", []) labels = item.get("choices", {}).get("label", []) gold_idx = labels.index(item.get("answerKey", "")) return f" {texts[gold_idx]}" def _get_cue_text(self, item: dict[str, Any]) -> str: return "Answer:" def _get_possible_completions(self, item: dict[str, Any]) -> list[str] | None: texts = item.get("choices", {}).get("text", []) return [f" {t}" for t in texts] def _get_fewshot_target_text(self, item: dict[str, Any]) -> str: ground_truth = self._get_ground_truth(item) assert ground_truth is not None return f"{self._get_cue_text(item)}{ground_truth}"
[docs] class NaturalQsOpenMC(NaturalQsOpenCloze): NAME = "NaturalQsOpenMC" def __init__(self, num_fewshot: int = 0) -> None: super().__init__(num_fewshot) self.keys = get_n_letters(4) def _get_instruction_text(self, item: dict[str, Any]) -> str: question = item.get("question", "") texts = item.get("choices", {}).get("text", []) options = "\n".join(f" {key}. {t}" for key, t in zip(self.keys, texts)) return f"Question: {question}\n{options}\n" def _get_ground_truth(self, item: dict[str, Any]) -> str | None: gold_idx = self.keys.index(item.get("answerKey", "")) return f" {self.keys[gold_idx]}" def _get_possible_completions(self, item: dict[str, Any]) -> list[str] | None: return [f" {key}" for key in self.keys] def _get_fewshot_target_text(self, item: dict[str, Any]) -> str: ground_truth = self._get_ground_truth(item) assert ground_truth is not None return f"{self._get_cue_text(item)}{ground_truth}"
[docs] class NaturalQsOpenMC_OLMES(NaturalQsOpenMC): """ NaturalQsOpenMC with OLMES-style prompt: space before each label in the prompt (" A.", " B.", ...). """ NAME = "NaturalQsOpenMC_OLMES" def _get_instruction_text(self, item: dict[str, Any]) -> str: question = item.get("question", "") texts = item.get("choices", {}).get("text", []) options = "\n".join(f" {key}. {t}" for key, t in zip(self.keys, texts)) return f"Question: {question}\n{options}\n"