Source code for eval_framework.tasks.benchmarks.sphyr

from typing import Any

from eval_framework.metrics.completion.grid_difference import GridDifference
from eval_framework.tasks.base import BaseTask, Language, ResponseType

SUBJECTS = [
    "1_random_cell_easy",
    "5_random_cell_easy",
    "10_random_cell_easy",
    "1_random_row_easy",
    "3_random_row_easy",
    "1_random_column_easy",
    "3_random_column_easy",
    "full_easy",
    "1_random_cell_hard",
    "5_random_cell_hard",
    "10_random_cell_hard",
    "1_random_row_hard",
    "3_random_row_hard",
    "1_random_column_hard",
    "3_random_column_hard",
    "full_hard",
]

SYSTEM_PROMPT = """You are given a structural material distribution represented as a grid. Each cell can have one of the following states:
- 'L' indicates applied load.
- 'V' indicates void.
- 'S' indicates support.

The goal is to predict the correct material distribution by filling in all {FILL_INSTRUCTION}, based on the surrounding structure and implicit physical reasoning (such as load paths, supports, and forces).

Important: The completed structure should use as little material as possible while remaining stable and plausible for carrying the applied forces. Minimize material usage unless necessary for structural support."""  # noqa: E501

PROMPT_TEMPLATE = """Below is the input grid with masked regions:

{GRID}

Please output the completed grid by replacing all {FILL_INSTRUCTION}.
Maintain the same format as the input: one row per line, cells separated by spaces, and the total number of rows and columns unchanged.
Return only the completed grid without any additional explanation."""  # noqa: E501

EASY_FILL_INSTRUCTION = "'V' cells with either '1' (solid) or '0' (empty)"

HARD_FILL_INSTRUCTION = (
    "'V' cells with a floating point number between 0 and 1, with one decimal place (e.g., 0.0, 0.1, 0.2, ..., 1.0)"
)


[docs] class SPHYR(BaseTask[str]): """SPhyR dataset: https://huggingface.co/datasets/philippds/SPhyR""" NAME = "SPHYR" DATASET_PATH = "philippds/SPhyR" SAMPLE_SPLIT = "test" FEWSHOT_SPLIT = "" RESPONSE_TYPE = ResponseType.COMPLETION METRICS = [GridDifference] SUBJECTS = SUBJECTS PERTURBATION_UNMODIFIABLE_WORDS = None LANGUAGE = Language.ENG def __init__(self, num_fewshot: int = 0) -> None: assert num_fewshot == 0, "Fewshot is not supported for SPHYR" super().__init__(num_fewshot) def _grid_to_str(self, grid: list[list[str]]) -> str: return "\n".join(" ".join(str(cell) for cell in row) for row in grid) def _get_system_prompt_text(self, item: dict[str, Any]) -> str | None: FILL_INSTRUCTION = EASY_FILL_INSTRUCTION if "easy" in item["subject"] else HARD_FILL_INSTRUCTION return SYSTEM_PROMPT.format(FILL_INSTRUCTION=FILL_INSTRUCTION) def _get_instruction_text(self, item: dict[str, Any]) -> str: FILL_INSTRUCTION = EASY_FILL_INSTRUCTION if "easy" in item["subject"] else HARD_FILL_INSTRUCTION grid = self._grid_to_str(item["input_grid"]) return PROMPT_TEMPLATE.format(GRID=grid, FILL_INSTRUCTION=FILL_INSTRUCTION) def _get_ground_truth(self, item: dict[str, Any]) -> str | None: return self._grid_to_str(item["ground_truth"])