MMMLU_GERMAN_COTΒΆ

NAME = MMMLU_GERMAN_COT
DATASET_PATH = openai/MMMLU
SAMPLE_SPLIT = test
FEWSHOT_SPLIT = test
RESPONSE_TYPE = COMPLETION
METRICS = [AccuracyCompletion, GermanCompletionChecker]
SUBJECTS = [('DE_DE', 'abstract_algebra'), ('DE_DE', 'anatomy'), ('DE_DE', 'astronomy'), ('DE_DE', 'business_ethics'), ('DE_DE', 'clinical_knowledge'), ('DE_DE', 'college_biology'), ('DE_DE', 'college_chemistry'), ('DE_DE', 'college_computer_science'), ('DE_DE', 'college_mathematics'), ('DE_DE', 'college_medicine'), ('DE_DE', 'college_physics'), ('DE_DE', 'computer_security'), ('DE_DE', 'conceptual_physics'), ('DE_DE', 'econometrics'), ('DE_DE', 'electrical_engineering'), ('DE_DE', 'elementary_mathematics'), ('DE_DE', 'formal_logic'), ('DE_DE', 'global_facts'), ('DE_DE', 'high_school_biology'), ('DE_DE', 'high_school_chemistry'), ('DE_DE', 'high_school_computer_science'), ('DE_DE', 'high_school_european_history'), ('DE_DE', 'high_school_geography'), ('DE_DE', 'high_school_government_and_politics'), ('DE_DE', 'high_school_macroeconomics'), ('DE_DE', 'high_school_mathematics'), ('DE_DE', 'high_school_microeconomics'), ('DE_DE', 'high_school_physics'), ('DE_DE', 'high_school_psychology'), ('DE_DE', 'high_school_statistics'), ('DE_DE', 'high_school_us_history'), ('DE_DE', 'high_school_world_history'), ('DE_DE', 'human_aging'), ('DE_DE', 'human_sexuality'), ('DE_DE', 'international_law'), ('DE_DE', 'jurisprudence'), ('DE_DE', 'logical_fallacies'), ('DE_DE', 'machine_learning'), ('DE_DE', 'management'), ('DE_DE', 'marketing'), ('DE_DE', 'medical_genetics'), ('DE_DE', 'miscellaneous'), ('DE_DE', 'moral_disputes'), ('DE_DE', 'moral_scenarios'), ('DE_DE', 'nutrition'), ('DE_DE', 'philosophy'), ('DE_DE', 'prehistory'), ('DE_DE', 'professional_accounting'), ('DE_DE', 'professional_law'), ('DE_DE', 'professional_medicine'), ('DE_DE', 'professional_psychology'), ('DE_DE', 'public_relations'), ('DE_DE', 'security_studies'), ('DE_DE', 'sociology'), ('DE_DE', 'us_foreign_policy'), ('DE_DE', 'virology'), ('DE_DE', 'world_religions')]
LANGUAGE = {"('de', 'abstract_algebra')": <Language.DEU: 'German'>, "('de', 'anatomy')": <Language.DEU: 'German'>, "('de', 'astronomy')": <Language.DEU: 'German'>, "('de', 'business_ethics')": <Language.DEU: 'German'>, "('de', 'clinical_knowledge')": <Language.DEU: 'German'>, "('de', 'college_biology')": <Language.DEU: 'German'>, "('de', 'college_chemistry')": <Language.DEU: 'German'>, "('de', 'college_computer_science')": <Language.DEU: 'German'>, "('de', 'college_mathematics')": <Language.DEU: 'German'>, "('de', 'college_medicine')": <Language.DEU: 'German'>, "('de', 'college_physics')": <Language.DEU: 'German'>, "('de', 'computer_security')": <Language.DEU: 'German'>, "('de', 'conceptual_physics')": <Language.DEU: 'German'>, "('de', 'econometrics')": <Language.DEU: 'German'>, "('de', 'electrical_engineering')": <Language.DEU: 'German'>, "('de', 'elementary_mathematics')": <Language.DEU: 'German'>, "('de', 'formal_logic')": <Language.DEU: 'German'>, "('de', 'global_facts')": <Language.DEU: 'German'>, "('de', 'high_school_biology')": <Language.DEU: 'German'>, "('de', 'high_school_chemistry')": <Language.DEU: 'German'>, "('de', 'high_school_computer_science')": <Language.DEU: 'German'>, "('de', 'high_school_european_history')": <Language.DEU: 'German'>, "('de', 'high_school_geography')": <Language.DEU: 'German'>, "('de', 'high_school_government_and_politics')": <Language.DEU: 'German'>, "('de', 'high_school_macroeconomics')": <Language.DEU: 'German'>, "('de', 'high_school_mathematics')": <Language.DEU: 'German'>, "('de', 'high_school_microeconomics')": <Language.DEU: 'German'>, "('de', 'high_school_physics')": <Language.DEU: 'German'>, "('de', 'high_school_psychology')": <Language.DEU: 'German'>, "('de', 'high_school_statistics')": <Language.DEU: 'German'>, "('de', 'high_school_us_history')": <Language.DEU: 'German'>, "('de', 'high_school_world_history')": <Language.DEU: 'German'>, "('de', 'human_aging')": <Language.DEU: 'German'>, "('de', 'human_sexuality')": <Language.DEU: 'German'>, "('de', 'international_law')": <Language.DEU: 'German'>, "('de', 'jurisprudence')": <Language.DEU: 'German'>, "('de', 'logical_fallacies')": <Language.DEU: 'German'>, "('de', 'machine_learning')": <Language.DEU: 'German'>, "('de', 'management')": <Language.DEU: 'German'>, "('de', 'marketing')": <Language.DEU: 'German'>, "('de', 'medical_genetics')": <Language.DEU: 'German'>, "('de', 'miscellaneous')": <Language.DEU: 'German'>, "('de', 'moral_disputes')": <Language.DEU: 'German'>, "('de', 'moral_scenarios')": <Language.DEU: 'German'>, "('de', 'nutrition')": <Language.DEU: 'German'>, "('de', 'philosophy')": <Language.DEU: 'German'>, "('de', 'prehistory')": <Language.DEU: 'German'>, "('de', 'professional_accounting')": <Language.DEU: 'German'>, "('de', 'professional_law')": <Language.DEU: 'German'>, "('de', 'professional_medicine')": <Language.DEU: 'German'>, "('de', 'professional_psychology')": <Language.DEU: 'German'>, "('de', 'public_relations')": <Language.DEU: 'German'>, "('de', 'security_studies')": <Language.DEU: 'German'>, "('de', 'sociology')": <Language.DEU: 'German'>, "('de', 'us_foreign_policy')": <Language.DEU: 'German'>, "('de', 'virology')": <Language.DEU: 'German'>, "('de', 'world_religions')": <Language.DEU: 'German'>}

More detailed documentation, with prompt examples and ground truth completions, can be generated with uv run -m eval_framework.utils.generate_task_docs --add-prompt-examples --only-tasks "MMMLU_GERMAN_COT".