Source code for eval_framework.tasks.benchmarks.winox

import os
from pathlib import Path
from typing import Any

from datasets import DownloadConfig, load_dataset
from huggingface_hub import HfApi
from huggingface_hub.errors import RevisionNotFoundError

from eval_framework.tasks.base import Language
from eval_framework.tasks.benchmarks.winogrande import WINOGRANDE

ANSWER_STR_TO_NUM = {"1": 0, "2": 1}


[docs] class WINOX(WINOGRANDE): """ Wino-X is a parallel dataset of German, French, and Russian Winograd schemas, aligned with their English counterparts, used to examine whether neural machine translation models can perform coreference resolution that requires commonsense knowledge, and whether multilingual language models are capable of commonsense reasoning across multiple languages. Winogrande: https://arxiv.org/abs/1907.10641 Wino-X: https://github.com/demelin/Wino-X Wino-X: https://huggingface.co/datasets/demelin/wino_x """ DATASET_PATH = "demelin/wino_x" HF_REVISION = "7d82697fd52ac8b03e62aadfddc61077320f21e7" SAMPLE_SPLIT = "test" FEWSHOT_SPLIT = "test" LANGUAGE_SHORT_CODE = "" def _get_ground_truth(self, item: dict[str, Any]) -> str | None: choices = self._extract_choices(item) # in winogrande answer is a string but in wino_x it is an int return f" {choices[ANSWER_STR_TO_NUM[str(item['answer'])]]}" def _extract_question(self, item: dict) -> str: question, _ = item[f"context_{self.LANGUAGE_SHORT_CODE}"].split("_") question = question.replace(" ", " ") return question.strip() def _extract_choices(self, item: dict) -> list[str]: _, choice_suffix = item[f"context_{self.LANGUAGE_SHORT_CODE}"].split("_") choice_suffix = choice_suffix.replace(" ", " ") choices = [ choice + choice_suffix for choice in [item[f"option1_{self.LANGUAGE_SHORT_CODE}"], item[f"option2_{self.LANGUAGE_SHORT_CODE}"]] ] return choices def _load_hf_dataset(self, **kwargs: Any) -> Any: """Override to handle FLORES-200 encoding issues by using parquet files.""" # Check if the HF_REVISION is valid before loading the dataset if self.HF_REVISION: try: _ = HfApi().dataset_info(repo_id=kwargs["path"], revision=self.HF_REVISION, timeout=100.0) except Exception as e: if isinstance(e, RevisionNotFoundError): raise e cache_dir: str = os.environ.get("HF_DATASET_CACHE_DIR", f"{Path.home()}/.cache/huggingface/datasets") download_config = DownloadConfig(cache_dir=cache_dir, max_retries=5) dataset = load_dataset( kwargs.get("path", self.DATASET_PATH), name=kwargs.get("name"), split=kwargs.get("split"), data_files=None, # Let it auto-discover parquet files revision=self.HF_REVISION, cache_dir=cache_dir, download_config=download_config, ) return dataset
[docs] class WINOX_DE(WINOX): NAME = "WINOX_DE" SUBJECTS = ["lm_en_de"] LANGUAGE = Language.DEU LANGUAGE_SHORT_CODE = "de"
[docs] class WINOX_FR(WINOX): NAME = "WINOX_FR" SUBJECTS = ["lm_en_fr"] LANGUAGE = Language.FRA LANGUAGE_SHORT_CODE = "fr"