MMMLUΒΆ

NAME = MMMLU
DATASET_PATH = openai/MMMLU
SAMPLE_SPLIT = test
FEWSHOT_SPLIT = test
RESPONSE_TYPE = LOGLIKELIHOODS
METRICS = [AccuracyLoglikelihood, AccuracyNormLoglikelihood]
SUBJECTS = [('FR_FR', 'abstract_algebra'), ('FR_FR', 'anatomy'), ('FR_FR', 'astronomy'), ('FR_FR', 'business_ethics'), ('FR_FR', 'clinical_knowledge'), ('FR_FR', 'college_biology'), ('FR_FR', 'college_chemistry'), ('FR_FR', 'college_computer_science'), ('FR_FR', 'college_mathematics'), ('FR_FR', 'college_medicine'), ('FR_FR', 'college_physics'), ('FR_FR', 'computer_security'), ('FR_FR', 'conceptual_physics'), ('FR_FR', 'econometrics'), ('FR_FR', 'electrical_engineering'), ('FR_FR', 'elementary_mathematics'), ('FR_FR', 'formal_logic'), ('FR_FR', 'global_facts'), ('FR_FR', 'high_school_biology'), ('FR_FR', 'high_school_chemistry'), ('FR_FR', 'high_school_computer_science'), ('FR_FR', 'high_school_european_history'), ('FR_FR', 'high_school_geography'), ('FR_FR', 'high_school_government_and_politics'), ('FR_FR', 'high_school_macroeconomics'), ('FR_FR', 'high_school_mathematics'), ('FR_FR', 'high_school_microeconomics'), ('FR_FR', 'high_school_physics'), ('FR_FR', 'high_school_psychology'), ('FR_FR', 'high_school_statistics'), ('FR_FR', 'high_school_us_history'), ('FR_FR', 'high_school_world_history'), ('FR_FR', 'human_aging'), ('FR_FR', 'human_sexuality'), ('FR_FR', 'international_law'), ('FR_FR', 'jurisprudence'), ('FR_FR', 'logical_fallacies'), ('FR_FR', 'machine_learning'), ('FR_FR', 'management'), ('FR_FR', 'marketing'), ('FR_FR', 'medical_genetics'), ('FR_FR', 'miscellaneous'), ('FR_FR', 'moral_disputes'), ('FR_FR', 'moral_scenarios'), ('FR_FR', 'nutrition'), ('FR_FR', 'philosophy'), ('FR_FR', 'prehistory'), ('FR_FR', 'professional_accounting'), ('FR_FR', 'professional_law'), ('FR_FR', 'professional_medicine'), ('FR_FR', 'professional_psychology'), ('FR_FR', 'public_relations'), ('FR_FR', 'security_studies'), ('FR_FR', 'sociology'), ('FR_FR', 'us_foreign_policy'), ('FR_FR', 'virology'), ('FR_FR', 'world_religions'), ('DE_DE', 'abstract_algebra'), ('DE_DE', 'anatomy'), ('DE_DE', 'astronomy'), ('DE_DE', 'business_ethics'), ('DE_DE', 'clinical_knowledge'), ('DE_DE', 'college_biology'), ('DE_DE', 'college_chemistry'), ('DE_DE', 'college_computer_science'), ('DE_DE', 'college_mathematics'), ('DE_DE', 'college_medicine'), ('DE_DE', 'college_physics'), ('DE_DE', 'computer_security'), ('DE_DE', 'conceptual_physics'), ('DE_DE', 'econometrics'), ('DE_DE', 'electrical_engineering'), ('DE_DE', 'elementary_mathematics'), ('DE_DE', 'formal_logic'), ('DE_DE', 'global_facts'), ('DE_DE', 'high_school_biology'), ('DE_DE', 'high_school_chemistry'), ('DE_DE', 'high_school_computer_science'), ('DE_DE', 'high_school_european_history'), ('DE_DE', 'high_school_geography'), ('DE_DE', 'high_school_government_and_politics'), ('DE_DE', 'high_school_macroeconomics'), ('DE_DE', 'high_school_mathematics'), ('DE_DE', 'high_school_microeconomics'), ('DE_DE', 'high_school_physics'), ('DE_DE', 'high_school_psychology'), ('DE_DE', 'high_school_statistics'), ('DE_DE', 'high_school_us_history'), ('DE_DE', 'high_school_world_history'), ('DE_DE', 'human_aging'), ('DE_DE', 'human_sexuality'), ('DE_DE', 'international_law'), ('DE_DE', 'jurisprudence'), ('DE_DE', 'logical_fallacies'), ('DE_DE', 'machine_learning'), ('DE_DE', 'management'), ('DE_DE', 'marketing'), ('DE_DE', 'medical_genetics'), ('DE_DE', 'miscellaneous'), ('DE_DE', 'moral_disputes'), ('DE_DE', 'moral_scenarios'), ('DE_DE', 'nutrition'), ('DE_DE', 'philosophy'), ('DE_DE', 'prehistory'), ('DE_DE', 'professional_accounting'), ('DE_DE', 'professional_law'), ('DE_DE', 'professional_medicine'), ('DE_DE', 'professional_psychology'), ('DE_DE', 'public_relations'), ('DE_DE', 'security_studies'), ('DE_DE', 'sociology'), ('DE_DE', 'us_foreign_policy'), ('DE_DE', 'virology'), ('DE_DE', 'world_religions'), ('ES_LA', 'abstract_algebra'), ('ES_LA', 'anatomy'), ('ES_LA', 'astronomy'), ('ES_LA', 'business_ethics'), ('ES_LA', 'clinical_knowledge'), ('ES_LA', 'college_biology'), ('ES_LA', 'college_chemistry'), ('ES_LA', 'college_computer_science'), ('ES_LA', 'college_mathematics'), ('ES_LA', 'college_medicine'), ('ES_LA', 'college_physics'), ('ES_LA', 'computer_security'), ('ES_LA', 'conceptual_physics'), ('ES_LA', 'econometrics'), ('ES_LA', 'electrical_engineering'), ('ES_LA', 'elementary_mathematics'), ('ES_LA', 'formal_logic'), ('ES_LA', 'global_facts'), ('ES_LA', 'high_school_biology'), ('ES_LA', 'high_school_chemistry'), ('ES_LA', 'high_school_computer_science'), ('ES_LA', 'high_school_european_history'), ('ES_LA', 'high_school_geography'), ('ES_LA', 'high_school_government_and_politics'), ('ES_LA', 'high_school_macroeconomics'), ('ES_LA', 'high_school_mathematics'), ('ES_LA', 'high_school_microeconomics'), ('ES_LA', 'high_school_physics'), ('ES_LA', 'high_school_psychology'), ('ES_LA', 'high_school_statistics'), ('ES_LA', 'high_school_us_history'), ('ES_LA', 'high_school_world_history'), ('ES_LA', 'human_aging'), ('ES_LA', 'human_sexuality'), ('ES_LA', 'international_law'), ('ES_LA', 'jurisprudence'), ('ES_LA', 'logical_fallacies'), ('ES_LA', 'machine_learning'), ('ES_LA', 'management'), ('ES_LA', 'marketing'), ('ES_LA', 'medical_genetics'), ('ES_LA', 'miscellaneous'), ('ES_LA', 'moral_disputes'), ('ES_LA', 'moral_scenarios'), ('ES_LA', 'nutrition'), ('ES_LA', 'philosophy'), ('ES_LA', 'prehistory'), ('ES_LA', 'professional_accounting'), ('ES_LA', 'professional_law'), ('ES_LA', 'professional_medicine'), ('ES_LA', 'professional_psychology'), ('ES_LA', 'public_relations'), ('ES_LA', 'security_studies'), ('ES_LA', 'sociology'), ('ES_LA', 'us_foreign_policy'), ('ES_LA', 'virology'), ('ES_LA', 'world_religions'), ('IT_IT', 'abstract_algebra'), ('IT_IT', 'anatomy'), ('IT_IT', 'astronomy'), ('IT_IT', 'business_ethics'), ('IT_IT', 'clinical_knowledge'), ('IT_IT', 'college_biology'), ('IT_IT', 'college_chemistry'), ('IT_IT', 'college_computer_science'), ('IT_IT', 'college_mathematics'), ('IT_IT', 'college_medicine'), ('IT_IT', 'college_physics'), ('IT_IT', 'computer_security'), ('IT_IT', 'conceptual_physics'), ('IT_IT', 'econometrics'), ('IT_IT', 'electrical_engineering'), ('IT_IT', 'elementary_mathematics'), ('IT_IT', 'formal_logic'), ('IT_IT', 'global_facts'), ('IT_IT', 'high_school_biology'), ('IT_IT', 'high_school_chemistry'), ('IT_IT', 'high_school_computer_science'), ('IT_IT', 'high_school_european_history'), ('IT_IT', 'high_school_geography'), ('IT_IT', 'high_school_government_and_politics'), ('IT_IT', 'high_school_macroeconomics'), ('IT_IT', 'high_school_mathematics'), ('IT_IT', 'high_school_microeconomics'), ('IT_IT', 'high_school_physics'), ('IT_IT', 'high_school_psychology'), ('IT_IT', 'high_school_statistics'), ('IT_IT', 'high_school_us_history'), ('IT_IT', 'high_school_world_history'), ('IT_IT', 'human_aging'), ('IT_IT', 'human_sexuality'), ('IT_IT', 'international_law'), ('IT_IT', 'jurisprudence'), ('IT_IT', 'logical_fallacies'), ('IT_IT', 'machine_learning'), ('IT_IT', 'management'), ('IT_IT', 'marketing'), ('IT_IT', 'medical_genetics'), ('IT_IT', 'miscellaneous'), ('IT_IT', 'moral_disputes'), ('IT_IT', 'moral_scenarios'), ('IT_IT', 'nutrition'), ('IT_IT', 'philosophy'), ('IT_IT', 'prehistory'), ('IT_IT', 'professional_accounting'), ('IT_IT', 'professional_law'), ('IT_IT', 'professional_medicine'), ('IT_IT', 'professional_psychology'), ('IT_IT', 'public_relations'), ('IT_IT', 'security_studies'), ('IT_IT', 'sociology'), ('IT_IT', 'us_foreign_policy'), ('IT_IT', 'virology'), ('IT_IT', 'world_religions'), ('PT_BR', 'abstract_algebra'), ('PT_BR', 'anatomy'), ('PT_BR', 'astronomy'), ('PT_BR', 'business_ethics'), ('PT_BR', 'clinical_knowledge'), ('PT_BR', 'college_biology'), ('PT_BR', 'college_chemistry'), ('PT_BR', 'college_computer_science'), ('PT_BR', 'college_mathematics'), ('PT_BR', 'college_medicine'), ('PT_BR', 'college_physics'), ('PT_BR', 'computer_security'), ('PT_BR', 'conceptual_physics'), ('PT_BR', 'econometrics'), ('PT_BR', 'electrical_engineering'), ('PT_BR', 'elementary_mathematics'), ('PT_BR', 'formal_logic'), ('PT_BR', 'global_facts'), ('PT_BR', 'high_school_biology'), ('PT_BR', 'high_school_chemistry'), ('PT_BR', 'high_school_computer_science'), ('PT_BR', 'high_school_european_history'), ('PT_BR', 'high_school_geography'), ('PT_BR', 'high_school_government_and_politics'), ('PT_BR', 'high_school_macroeconomics'), ('PT_BR', 'high_school_mathematics'), ('PT_BR', 'high_school_microeconomics'), ('PT_BR', 'high_school_physics'), ('PT_BR', 'high_school_psychology'), ('PT_BR', 'high_school_statistics'), ('PT_BR', 'high_school_us_history'), ('PT_BR', 'high_school_world_history'), ('PT_BR', 'human_aging'), ('PT_BR', 'human_sexuality'), ('PT_BR', 'international_law'), ('PT_BR', 'jurisprudence'), ('PT_BR', 'logical_fallacies'), ('PT_BR', 'machine_learning'), ('PT_BR', 'management'), ('PT_BR', 'marketing'), ('PT_BR', 'medical_genetics'), ('PT_BR', 'miscellaneous'), ('PT_BR', 'moral_disputes'), ('PT_BR', 'moral_scenarios'), ('PT_BR', 'nutrition'), ('PT_BR', 'philosophy'), ('PT_BR', 'prehistory'), ('PT_BR', 'professional_accounting'), ('PT_BR', 'professional_law'), ('PT_BR', 'professional_medicine'), ('PT_BR', 'professional_psychology'), ('PT_BR', 'public_relations'), ('PT_BR', 'security_studies'), ('PT_BR', 'sociology'), ('PT_BR', 'us_foreign_policy'), ('PT_BR', 'virology'), ('PT_BR', 'world_religions'), ('AR_XY', 'abstract_algebra'), ('AR_XY', 'anatomy'), ('AR_XY', 'astronomy'), ('AR_XY', 'business_ethics'), ('AR_XY', 'clinical_knowledge'), ('AR_XY', 'college_biology'), ('AR_XY', 'college_chemistry'), ('AR_XY', 'college_computer_science'), ('AR_XY', 'college_mathematics'), ('AR_XY', 'college_medicine'), ('AR_XY', 'college_physics'), ('AR_XY', 'computer_security'), ('AR_XY', 'conceptual_physics'), ('AR_XY', 'econometrics'), ('AR_XY', 'electrical_engineering'), ('AR_XY', 'elementary_mathematics'), ('AR_XY', 'formal_logic'), ('AR_XY', 'global_facts'), ('AR_XY', 'high_school_biology'), ('AR_XY', 'high_school_chemistry'), ('AR_XY', 'high_school_computer_science'), ('AR_XY', 'high_school_european_history'), ('AR_XY', 'high_school_geography'), ('AR_XY', 'high_school_government_and_politics'), ('AR_XY', 'high_school_macroeconomics'), ('AR_XY', 'high_school_mathematics'), ('AR_XY', 'high_school_microeconomics'), ('AR_XY', 'high_school_physics'), ('AR_XY', 'high_school_psychology'), ('AR_XY', 'high_school_statistics'), ('AR_XY', 'high_school_us_history'), ('AR_XY', 'high_school_world_history'), ('AR_XY', 'human_aging'), ('AR_XY', 'human_sexuality'), ('AR_XY', 'international_law'), ('AR_XY', 'jurisprudence'), ('AR_XY', 'logical_fallacies'), ('AR_XY', 'machine_learning'), ('AR_XY', 'management'), ('AR_XY', 'marketing'), ('AR_XY', 'medical_genetics'), ('AR_XY', 'miscellaneous'), ('AR_XY', 'moral_disputes'), ('AR_XY', 'moral_scenarios'), ('AR_XY', 'nutrition'), ('AR_XY', 'philosophy'), ('AR_XY', 'prehistory'), ('AR_XY', 'professional_accounting'), ('AR_XY', 'professional_law'), ('AR_XY', 'professional_medicine'), ('AR_XY', 'professional_psychology'), ('AR_XY', 'public_relations'), ('AR_XY', 'security_studies'), ('AR_XY', 'sociology'), ('AR_XY', 'us_foreign_policy'), ('AR_XY', 'virology'), ('AR_XY', 'world_religions')]
LANGUAGE = {"('FR', 'abstract_algebra')": <Language.FRA: 'French'>, "('FR', 'anatomy')": <Language.FRA: 'French'>, "('FR', 'astronomy')": <Language.FRA: 'French'>, "('FR', 'business_ethics')": <Language.FRA: 'French'>, "('FR', 'clinical_knowledge')": <Language.FRA: 'French'>, "('FR', 'college_biology')": <Language.FRA: 'French'>, "('FR', 'college_chemistry')": <Language.FRA: 'French'>, "('FR', 'college_computer_science')": <Language.FRA: 'French'>, "('FR', 'college_mathematics')": <Language.FRA: 'French'>, "('FR', 'college_medicine')": <Language.FRA: 'French'>, "('FR', 'college_physics')": <Language.FRA: 'French'>, "('FR', 'computer_security')": <Language.FRA: 'French'>, "('FR', 'conceptual_physics')": <Language.FRA: 'French'>, "('FR', 'econometrics')": <Language.FRA: 'French'>, "('FR', 'electrical_engineering')": <Language.FRA: 'French'>, "('FR', 'elementary_mathematics')": <Language.FRA: 'French'>, "('FR', 'formal_logic')": <Language.FRA: 'French'>, "('FR', 'global_facts')": <Language.FRA: 'French'>, "('FR', 'high_school_biology')": <Language.FRA: 'French'>, "('FR', 'high_school_chemistry')": <Language.FRA: 'French'>, "('FR', 'high_school_computer_science')": <Language.FRA: 'French'>, "('FR', 'high_school_european_history')": <Language.FRA: 'French'>, "('FR', 'high_school_geography')": <Language.FRA: 'French'>, "('FR', 'high_school_government_and_politics')": <Language.FRA: 'French'>, "('FR', 'high_school_macroeconomics')": <Language.FRA: 'French'>, "('FR', 'high_school_mathematics')": <Language.FRA: 'French'>, "('FR', 'high_school_microeconomics')": <Language.FRA: 'French'>, "('FR', 'high_school_physics')": <Language.FRA: 'French'>, "('FR', 'high_school_psychology')": <Language.FRA: 'French'>, "('FR', 'high_school_statistics')": <Language.FRA: 'French'>, "('FR', 'high_school_us_history')": <Language.FRA: 'French'>, "('FR', 'high_school_world_history')": <Language.FRA: 'French'>, "('FR', 'human_aging')": <Language.FRA: 'French'>, "('FR', 'human_sexuality')": <Language.FRA: 'French'>, "('FR', 'international_law')": <Language.FRA: 'French'>, "('FR', 'jurisprudence')": <Language.FRA: 'French'>, "('FR', 'logical_fallacies')": <Language.FRA: 'French'>, "('FR', 'machine_learning')": <Language.FRA: 'French'>, "('FR', 'management')": <Language.FRA: 'French'>, "('FR', 'marketing')": <Language.FRA: 'French'>, "('FR', 'medical_genetics')": <Language.FRA: 'French'>, "('FR', 'miscellaneous')": <Language.FRA: 'French'>, "('FR', 'moral_disputes')": <Language.FRA: 'French'>, "('FR', 'moral_scenarios')": <Language.FRA: 'French'>, "('FR', 'nutrition')": <Language.FRA: 'French'>, "('FR', 'philosophy')": <Language.FRA: 'French'>, "('FR', 'prehistory')": <Language.FRA: 'French'>, "('FR', 'professional_accounting')": <Language.FRA: 'French'>, "('FR', 'professional_law')": <Language.FRA: 'French'>, "('FR', 'professional_medicine')": <Language.FRA: 'French'>, "('FR', 'professional_psychology')": <Language.FRA: 'French'>, "('FR', 'public_relations')": <Language.FRA: 'French'>, "('FR', 'security_studies')": <Language.FRA: 'French'>, "('FR', 'sociology')": <Language.FRA: 'French'>, "('FR', 'us_foreign_policy')": <Language.FRA: 'French'>, "('FR', 'virology')": <Language.FRA: 'French'>, "('FR', 'world_religions')": <Language.FRA: 'French'>, "('DE', 'abstract_algebra')": <Language.DEU: 'German'>, "('DE', 'anatomy')": <Language.DEU: 'German'>, "('DE', 'astronomy')": <Language.DEU: 'German'>, "('DE', 'business_ethics')": <Language.DEU: 'German'>, "('DE', 'clinical_knowledge')": <Language.DEU: 'German'>, "('DE', 'college_biology')": <Language.DEU: 'German'>, "('DE', 'college_chemistry')": <Language.DEU: 'German'>, "('DE', 'college_computer_science')": <Language.DEU: 'German'>, "('DE', 'college_mathematics')": <Language.DEU: 'German'>, "('DE', 'college_medicine')": <Language.DEU: 'German'>, "('DE', 'college_physics')": <Language.DEU: 'German'>, "('DE', 'computer_security')": <Language.DEU: 'German'>, "('DE', 'conceptual_physics')": <Language.DEU: 'German'>, "('DE', 'econometrics')": <Language.DEU: 'German'>, "('DE', 'electrical_engineering')": <Language.DEU: 'German'>, "('DE', 'elementary_mathematics')": <Language.DEU: 'German'>, "('DE', 'formal_logic')": <Language.DEU: 'German'>, "('DE', 'global_facts')": <Language.DEU: 'German'>, "('DE', 'high_school_biology')": <Language.DEU: 'German'>, "('DE', 'high_school_chemistry')": <Language.DEU: 'German'>, "('DE', 'high_school_computer_science')": <Language.DEU: 'German'>, "('DE', 'high_school_european_history')": <Language.DEU: 'German'>, "('DE', 'high_school_geography')": <Language.DEU: 'German'>, "('DE', 'high_school_government_and_politics')": <Language.DEU: 'German'>, "('DE', 'high_school_macroeconomics')": <Language.DEU: 'German'>, "('DE', 'high_school_mathematics')": <Language.DEU: 'German'>, "('DE', 'high_school_microeconomics')": <Language.DEU: 'German'>, "('DE', 'high_school_physics')": <Language.DEU: 'German'>, "('DE', 'high_school_psychology')": <Language.DEU: 'German'>, "('DE', 'high_school_statistics')": <Language.DEU: 'German'>, "('DE', 'high_school_us_history')": <Language.DEU: 'German'>, "('DE', 'high_school_world_history')": <Language.DEU: 'German'>, "('DE', 'human_aging')": <Language.DEU: 'German'>, "('DE', 'human_sexuality')": <Language.DEU: 'German'>, "('DE', 'international_law')": <Language.DEU: 'German'>, "('DE', 'jurisprudence')": <Language.DEU: 'German'>, "('DE', 'logical_fallacies')": <Language.DEU: 'German'>, "('DE', 'machine_learning')": <Language.DEU: 'German'>, "('DE', 'management')": <Language.DEU: 'German'>, "('DE', 'marketing')": <Language.DEU: 'German'>, "('DE', 'medical_genetics')": <Language.DEU: 'German'>, "('DE', 'miscellaneous')": <Language.DEU: 'German'>, "('DE', 'moral_disputes')": <Language.DEU: 'German'>, "('DE', 'moral_scenarios')": <Language.DEU: 'German'>, "('DE', 'nutrition')": <Language.DEU: 'German'>, "('DE', 'philosophy')": <Language.DEU: 'German'>, "('DE', 'prehistory')": <Language.DEU: 'German'>, "('DE', 'professional_accounting')": <Language.DEU: 'German'>, "('DE', 'professional_law')": <Language.DEU: 'German'>, "('DE', 'professional_medicine')": <Language.DEU: 'German'>, "('DE', 'professional_psychology')": <Language.DEU: 'German'>, "('DE', 'public_relations')": <Language.DEU: 'German'>, "('DE', 'security_studies')": <Language.DEU: 'German'>, "('DE', 'sociology')": <Language.DEU: 'German'>, "('DE', 'us_foreign_policy')": <Language.DEU: 'German'>, "('DE', 'virology')": <Language.DEU: 'German'>, "('DE', 'world_religions')": <Language.DEU: 'German'>, "('ES', 'abstract_algebra')": <Language.SPA: 'Spanish'>, "('ES', 'anatomy')": <Language.SPA: 'Spanish'>, "('ES', 'astronomy')": <Language.SPA: 'Spanish'>, "('ES', 'business_ethics')": <Language.SPA: 'Spanish'>, "('ES', 'clinical_knowledge')": <Language.SPA: 'Spanish'>, "('ES', 'college_biology')": <Language.SPA: 'Spanish'>, "('ES', 'college_chemistry')": <Language.SPA: 'Spanish'>, "('ES', 'college_computer_science')": <Language.SPA: 'Spanish'>, "('ES', 'college_mathematics')": <Language.SPA: 'Spanish'>, "('ES', 'college_medicine')": <Language.SPA: 'Spanish'>, "('ES', 'college_physics')": <Language.SPA: 'Spanish'>, "('ES', 'computer_security')": <Language.SPA: 'Spanish'>, "('ES', 'conceptual_physics')": <Language.SPA: 'Spanish'>, "('ES', 'econometrics')": <Language.SPA: 'Spanish'>, "('ES', 'electrical_engineering')": <Language.SPA: 'Spanish'>, "('ES', 'elementary_mathematics')": <Language.SPA: 'Spanish'>, "('ES', 'formal_logic')": <Language.SPA: 'Spanish'>, "('ES', 'global_facts')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_biology')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_chemistry')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_computer_science')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_european_history')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_geography')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_government_and_politics')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_macroeconomics')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_mathematics')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_microeconomics')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_physics')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_psychology')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_statistics')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_us_history')": <Language.SPA: 'Spanish'>, "('ES', 'high_school_world_history')": <Language.SPA: 'Spanish'>, "('ES', 'human_aging')": <Language.SPA: 'Spanish'>, "('ES', 'human_sexuality')": <Language.SPA: 'Spanish'>, "('ES', 'international_law')": <Language.SPA: 'Spanish'>, "('ES', 'jurisprudence')": <Language.SPA: 'Spanish'>, "('ES', 'logical_fallacies')": <Language.SPA: 'Spanish'>, "('ES', 'machine_learning')": <Language.SPA: 'Spanish'>, "('ES', 'management')": <Language.SPA: 'Spanish'>, "('ES', 'marketing')": <Language.SPA: 'Spanish'>, "('ES', 'medical_genetics')": <Language.SPA: 'Spanish'>, "('ES', 'miscellaneous')": <Language.SPA: 'Spanish'>, "('ES', 'moral_disputes')": <Language.SPA: 'Spanish'>, "('ES', 'moral_scenarios')": <Language.SPA: 'Spanish'>, "('ES', 'nutrition')": <Language.SPA: 'Spanish'>, "('ES', 'philosophy')": <Language.SPA: 'Spanish'>, "('ES', 'prehistory')": <Language.SPA: 'Spanish'>, "('ES', 'professional_accounting')": <Language.SPA: 'Spanish'>, "('ES', 'professional_law')": <Language.SPA: 'Spanish'>, "('ES', 'professional_medicine')": <Language.SPA: 'Spanish'>, "('ES', 'professional_psychology')": <Language.SPA: 'Spanish'>, "('ES', 'public_relations')": <Language.SPA: 'Spanish'>, "('ES', 'security_studies')": <Language.SPA: 'Spanish'>, "('ES', 'sociology')": <Language.SPA: 'Spanish'>, "('ES', 'us_foreign_policy')": <Language.SPA: 'Spanish'>, "('ES', 'virology')": <Language.SPA: 'Spanish'>, "('ES', 'world_religions')": <Language.SPA: 'Spanish'>, "('IT', 'abstract_algebra')": <Language.ITA: 'Italian'>, "('IT', 'anatomy')": <Language.ITA: 'Italian'>, "('IT', 'astronomy')": <Language.ITA: 'Italian'>, "('IT', 'business_ethics')": <Language.ITA: 'Italian'>, "('IT', 'clinical_knowledge')": <Language.ITA: 'Italian'>, "('IT', 'college_biology')": <Language.ITA: 'Italian'>, "('IT', 'college_chemistry')": <Language.ITA: 'Italian'>, "('IT', 'college_computer_science')": <Language.ITA: 'Italian'>, "('IT', 'college_mathematics')": <Language.ITA: 'Italian'>, "('IT', 'college_medicine')": <Language.ITA: 'Italian'>, "('IT', 'college_physics')": <Language.ITA: 'Italian'>, "('IT', 'computer_security')": <Language.ITA: 'Italian'>, "('IT', 'conceptual_physics')": <Language.ITA: 'Italian'>, "('IT', 'econometrics')": <Language.ITA: 'Italian'>, "('IT', 'electrical_engineering')": <Language.ITA: 'Italian'>, "('IT', 'elementary_mathematics')": <Language.ITA: 'Italian'>, "('IT', 'formal_logic')": <Language.ITA: 'Italian'>, "('IT', 'global_facts')": <Language.ITA: 'Italian'>, "('IT', 'high_school_biology')": <Language.ITA: 'Italian'>, "('IT', 'high_school_chemistry')": <Language.ITA: 'Italian'>, "('IT', 'high_school_computer_science')": <Language.ITA: 'Italian'>, "('IT', 'high_school_european_history')": <Language.ITA: 'Italian'>, "('IT', 'high_school_geography')": <Language.ITA: 'Italian'>, "('IT', 'high_school_government_and_politics')": <Language.ITA: 'Italian'>, "('IT', 'high_school_macroeconomics')": <Language.ITA: 'Italian'>, "('IT', 'high_school_mathematics')": <Language.ITA: 'Italian'>, "('IT', 'high_school_microeconomics')": <Language.ITA: 'Italian'>, "('IT', 'high_school_physics')": <Language.ITA: 'Italian'>, "('IT', 'high_school_psychology')": <Language.ITA: 'Italian'>, "('IT', 'high_school_statistics')": <Language.ITA: 'Italian'>, "('IT', 'high_school_us_history')": <Language.ITA: 'Italian'>, "('IT', 'high_school_world_history')": <Language.ITA: 'Italian'>, "('IT', 'human_aging')": <Language.ITA: 'Italian'>, "('IT', 'human_sexuality')": <Language.ITA: 'Italian'>, "('IT', 'international_law')": <Language.ITA: 'Italian'>, "('IT', 'jurisprudence')": <Language.ITA: 'Italian'>, "('IT', 'logical_fallacies')": <Language.ITA: 'Italian'>, "('IT', 'machine_learning')": <Language.ITA: 'Italian'>, "('IT', 'management')": <Language.ITA: 'Italian'>, "('IT', 'marketing')": <Language.ITA: 'Italian'>, "('IT', 'medical_genetics')": <Language.ITA: 'Italian'>, "('IT', 'miscellaneous')": <Language.ITA: 'Italian'>, "('IT', 'moral_disputes')": <Language.ITA: 'Italian'>, "('IT', 'moral_scenarios')": <Language.ITA: 'Italian'>, "('IT', 'nutrition')": <Language.ITA: 'Italian'>, "('IT', 'philosophy')": <Language.ITA: 'Italian'>, "('IT', 'prehistory')": <Language.ITA: 'Italian'>, "('IT', 'professional_accounting')": <Language.ITA: 'Italian'>, "('IT', 'professional_law')": <Language.ITA: 'Italian'>, "('IT', 'professional_medicine')": <Language.ITA: 'Italian'>, "('IT', 'professional_psychology')": <Language.ITA: 'Italian'>, "('IT', 'public_relations')": <Language.ITA: 'Italian'>, "('IT', 'security_studies')": <Language.ITA: 'Italian'>, "('IT', 'sociology')": <Language.ITA: 'Italian'>, "('IT', 'us_foreign_policy')": <Language.ITA: 'Italian'>, "('IT', 'virology')": <Language.ITA: 'Italian'>, "('IT', 'world_religions')": <Language.ITA: 'Italian'>, "('PT', 'abstract_algebra')": <Language.POR: 'Portuguese'>, "('PT', 'anatomy')": <Language.POR: 'Portuguese'>, "('PT', 'astronomy')": <Language.POR: 'Portuguese'>, "('PT', 'business_ethics')": <Language.POR: 'Portuguese'>, "('PT', 'clinical_knowledge')": <Language.POR: 'Portuguese'>, "('PT', 'college_biology')": <Language.POR: 'Portuguese'>, "('PT', 'college_chemistry')": <Language.POR: 'Portuguese'>, "('PT', 'college_computer_science')": <Language.POR: 'Portuguese'>, "('PT', 'college_mathematics')": <Language.POR: 'Portuguese'>, "('PT', 'college_medicine')": <Language.POR: 'Portuguese'>, "('PT', 'college_physics')": <Language.POR: 'Portuguese'>, "('PT', 'computer_security')": <Language.POR: 'Portuguese'>, "('PT', 'conceptual_physics')": <Language.POR: 'Portuguese'>, "('PT', 'econometrics')": <Language.POR: 'Portuguese'>, "('PT', 'electrical_engineering')": <Language.POR: 'Portuguese'>, "('PT', 'elementary_mathematics')": <Language.POR: 'Portuguese'>, "('PT', 'formal_logic')": <Language.POR: 'Portuguese'>, "('PT', 'global_facts')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_biology')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_chemistry')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_computer_science')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_european_history')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_geography')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_government_and_politics')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_macroeconomics')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_mathematics')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_microeconomics')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_physics')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_psychology')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_statistics')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_us_history')": <Language.POR: 'Portuguese'>, "('PT', 'high_school_world_history')": <Language.POR: 'Portuguese'>, "('PT', 'human_aging')": <Language.POR: 'Portuguese'>, "('PT', 'human_sexuality')": <Language.POR: 'Portuguese'>, "('PT', 'international_law')": <Language.POR: 'Portuguese'>, "('PT', 'jurisprudence')": <Language.POR: 'Portuguese'>, "('PT', 'logical_fallacies')": <Language.POR: 'Portuguese'>, "('PT', 'machine_learning')": <Language.POR: 'Portuguese'>, "('PT', 'management')": <Language.POR: 'Portuguese'>, "('PT', 'marketing')": <Language.POR: 'Portuguese'>, "('PT', 'medical_genetics')": <Language.POR: 'Portuguese'>, "('PT', 'miscellaneous')": <Language.POR: 'Portuguese'>, "('PT', 'moral_disputes')": <Language.POR: 'Portuguese'>, "('PT', 'moral_scenarios')": <Language.POR: 'Portuguese'>, "('PT', 'nutrition')": <Language.POR: 'Portuguese'>, "('PT', 'philosophy')": <Language.POR: 'Portuguese'>, "('PT', 'prehistory')": <Language.POR: 'Portuguese'>, "('PT', 'professional_accounting')": <Language.POR: 'Portuguese'>, "('PT', 'professional_law')": <Language.POR: 'Portuguese'>, "('PT', 'professional_medicine')": <Language.POR: 'Portuguese'>, "('PT', 'professional_psychology')": <Language.POR: 'Portuguese'>, "('PT', 'public_relations')": <Language.POR: 'Portuguese'>, "('PT', 'security_studies')": <Language.POR: 'Portuguese'>, "('PT', 'sociology')": <Language.POR: 'Portuguese'>, "('PT', 'us_foreign_policy')": <Language.POR: 'Portuguese'>, "('PT', 'virology')": <Language.POR: 'Portuguese'>, "('PT', 'world_religions')": <Language.POR: 'Portuguese'>, "('AR', 'abstract_algebra')": <Language.ARB: 'Arabic'>, "('AR', 'anatomy')": <Language.ARB: 'Arabic'>, "('AR', 'astronomy')": <Language.ARB: 'Arabic'>, "('AR', 'business_ethics')": <Language.ARB: 'Arabic'>, "('AR', 'clinical_knowledge')": <Language.ARB: 'Arabic'>, "('AR', 'college_biology')": <Language.ARB: 'Arabic'>, "('AR', 'college_chemistry')": <Language.ARB: 'Arabic'>, "('AR', 'college_computer_science')": <Language.ARB: 'Arabic'>, "('AR', 'college_mathematics')": <Language.ARB: 'Arabic'>, "('AR', 'college_medicine')": <Language.ARB: 'Arabic'>, "('AR', 'college_physics')": <Language.ARB: 'Arabic'>, "('AR', 'computer_security')": <Language.ARB: 'Arabic'>, "('AR', 'conceptual_physics')": <Language.ARB: 'Arabic'>, "('AR', 'econometrics')": <Language.ARB: 'Arabic'>, "('AR', 'electrical_engineering')": <Language.ARB: 'Arabic'>, "('AR', 'elementary_mathematics')": <Language.ARB: 'Arabic'>, "('AR', 'formal_logic')": <Language.ARB: 'Arabic'>, "('AR', 'global_facts')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_biology')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_chemistry')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_computer_science')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_european_history')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_geography')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_government_and_politics')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_macroeconomics')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_mathematics')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_microeconomics')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_physics')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_psychology')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_statistics')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_us_history')": <Language.ARB: 'Arabic'>, "('AR', 'high_school_world_history')": <Language.ARB: 'Arabic'>, "('AR', 'human_aging')": <Language.ARB: 'Arabic'>, "('AR', 'human_sexuality')": <Language.ARB: 'Arabic'>, "('AR', 'international_law')": <Language.ARB: 'Arabic'>, "('AR', 'jurisprudence')": <Language.ARB: 'Arabic'>, "('AR', 'logical_fallacies')": <Language.ARB: 'Arabic'>, "('AR', 'machine_learning')": <Language.ARB: 'Arabic'>, "('AR', 'management')": <Language.ARB: 'Arabic'>, "('AR', 'marketing')": <Language.ARB: 'Arabic'>, "('AR', 'medical_genetics')": <Language.ARB: 'Arabic'>, "('AR', 'miscellaneous')": <Language.ARB: 'Arabic'>, "('AR', 'moral_disputes')": <Language.ARB: 'Arabic'>, "('AR', 'moral_scenarios')": <Language.ARB: 'Arabic'>, "('AR', 'nutrition')": <Language.ARB: 'Arabic'>, "('AR', 'philosophy')": <Language.ARB: 'Arabic'>, "('AR', 'prehistory')": <Language.ARB: 'Arabic'>, "('AR', 'professional_accounting')": <Language.ARB: 'Arabic'>, "('AR', 'professional_law')": <Language.ARB: 'Arabic'>, "('AR', 'professional_medicine')": <Language.ARB: 'Arabic'>, "('AR', 'professional_psychology')": <Language.ARB: 'Arabic'>, "('AR', 'public_relations')": <Language.ARB: 'Arabic'>, "('AR', 'security_studies')": <Language.ARB: 'Arabic'>, "('AR', 'sociology')": <Language.ARB: 'Arabic'>, "('AR', 'us_foreign_policy')": <Language.ARB: 'Arabic'>, "('AR', 'virology')": <Language.ARB: 'Arabic'>, "('AR', 'world_religions')": <Language.ARB: 'Arabic'>}

More detailed documentation, with prompt examples and ground truth completions, can be generated with uv run -m eval_framework.utils.generate_task_docs --add-prompt-examples --only-tasks "MMMLU".