Source code for eval_framework.tasks.benchmarks.sphyr

from typing import Any

from eval_framework.metrics.completion.grid_difference import GridDifference
from eval_framework.tasks.base import BaseTask, Language, ResponseType

SUBJECTS = [
    "1_random_cell_easy",
    "5_random_cell_easy",
    "10_random_cell_easy",
    "1_random_row_easy",
    "3_random_row_easy",
    "1_random_column_easy",
    "3_random_column_easy",
    "full_easy",
    "1_random_cell_hard",
    "5_random_cell_hard",
    "10_random_cell_hard",
    "1_random_row_hard",
    "3_random_row_hard",
    "1_random_column_hard",
    "3_random_column_hard",
    "full_hard",
]

SYSTEM_PROMPT = """You are given a structural material distribution represented as a grid. Each cell can have one of the following states:
- 'L' indicates applied load.
- 'V' indicates void.
- 'S' indicates support.

The goal is to predict the correct material distribution by filling in all {FILL_INSTRUCTION}, based on the surrounding structure and implicit physical reasoning (such as load paths, supports, and forces).

Important: The completed structure should use as little material as possible while remaining stable and plausible for carrying the applied forces. Minimize material usage unless necessary for structural support."""  # noqa: E501

PROMPT_TEMPLATE = """Below is the input grid with masked regions:

{GRID}

Please output the completed grid by replacing all {FILL_INSTRUCTION}.
Maintain the same format as the input: one row per line, cells separated by spaces, and the total number of rows and columns unchanged.
Return only the completed grid without any additional explanation."""  # noqa: E501

EASY_FILL_INSTRUCTION = "'V' cells with either '1' (solid) or '0' (empty)"

HARD_FILL_INSTRUCTION = (
    "'V' cells with a floating point number between 0 and 1, with one decimal place (e.g., 0.0, 0.1, 0.2, ..., 1.0)"
)



[docs]
class SPHYR(BaseTask[str]):
    """SPhyR dataset: https://huggingface.co/datasets/philippds/SPhyR"""

    NAME = "SPHYR"
    DATASET_PATH = "philippds/SPhyR"
    SAMPLE_SPLIT = "test"
    FEWSHOT_SPLIT = ""
    RESPONSE_TYPE = ResponseType.COMPLETION
    METRICS = [GridDifference]
    SUBJECTS = SUBJECTS
    PERTURBATION_UNMODIFIABLE_WORDS = None
    LANGUAGE = Language.ENG

    def __init__(self, num_fewshot: int = 0) -> None:
        assert num_fewshot == 0, "Fewshot is not supported for SPHYR"
        super().__init__(num_fewshot)

    def _grid_to_str(self, grid: list[list[str]]) -> str:
        return "\n".join(" ".join(str(cell) for cell in row) for row in grid)

    def _get_system_prompt_text(self, item: dict[str, Any]) -> str | None:
        FILL_INSTRUCTION = EASY_FILL_INSTRUCTION if "easy" in item["subject"] else HARD_FILL_INSTRUCTION
        return SYSTEM_PROMPT.format(FILL_INSTRUCTION=FILL_INSTRUCTION)

    def _get_instruction_text(self, item: dict[str, Any]) -> str:
        FILL_INSTRUCTION = EASY_FILL_INSTRUCTION if "easy" in item["subject"] else HARD_FILL_INSTRUCTION
        grid = self._grid_to_str(item["input_grid"])
        return PROMPT_TEMPLATE.format(GRID=grid, FILL_INSTRUCTION=FILL_INSTRUCTION)

    def _get_ground_truth(self, item: dict[str, Any]) -> str | None:
        return self._grid_to_str(item["ground_truth"])