Source code for eval_framework.metrics.completion.struct_eval_metrics

import csv
import io
import json
import tomllib
from typing import Any

import xmltodict
import yaml
from lxml import etree

from eval_framework.metrics.base import BaseMetric, MetricResult
from eval_framework.shared.types import BaseMetricContext, Completion, extract_context_metric


[docs] class StructMetricContext(BaseMetricContext): output_type: str paths: list[str]
[docs] class StructMetric(BaseMetric[Completion]): NAME = "StructMetric"
[docs] def calculate(self, response: Completion) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] context = extract_context_metric(response, StructMetricContext) output_type = context.output_type try: match output_type.lower(): case "json": result = json.loads(response.completion) case "yaml": result = list(yaml.safe_load_all(response.completion)) if isinstance(result, list) and len(result) == 1: result = result[0] else: raise yaml.YAMLError("Multiple documents found in YAML") case "toml": result = tomllib.loads(response.completion) case "xml": result = xmltodict.parse(response.completion) case "csv": csv_output = csv.DictReader(io.StringIO(response.completion)) # Check for unclosed quotes if response.completion.count('"') % 2 != 0: raise csv.Error("Unclosed quote in CSV") if not csv_output.fieldnames: raise csv.Error("CSV has no headers") result = {"csv_headers": csv_output.fieldnames, "csv_rows": list(csv_output)} case _: raise ValueError(f"Unsupported format: {output_type}") valid_format = 1.0 except (json.JSONDecodeError, yaml.YAMLError, tomllib.TOMLDecodeError, csv.Error, Exception): valid_format = 0.0 has_required_fields = 0.0 if valid_format == 1: # assert "paths" in response.eval_kwargs, "Paths must be provided in eval_kwargs" assert context.paths is not None, "Paths must be provided in context" paths = context.paths assert isinstance(paths, list), "Paths must be a list of strings" valid_paths = 0 for path in paths: if path_exists(result, path): valid_paths += 1 has_required_fields = valid_paths / len(paths) if paths else 1.0 return [ MetricResult( metric_name=f"{self.NAME}/valid_format", value=valid_format, higher_is_better=True, ), MetricResult( metric_name=f"{self.NAME}/has_keywords", value=has_required_fields, higher_is_better=True, ), ]
[docs] def is_valid_html(html: str) -> bool: parser = etree.HTMLParser(recover=False) try: etree.fromstring(html.encode("utf-8"), parser) except etree.XMLSyntaxError: return False return len(parser.error_log) == 0
[docs] class RenderableStructMetricContext(BaseMetricContext): output_type: str keywords: list[str]
[docs] class RenderableStructMetric(StructMetric): NAME = "RenderableStructMetric"
[docs] def calculate(self, response: Completion) -> list[MetricResult]: if response.error is not None: return [MetricResult(metric_name=self.NAME, value=None, higher_is_better=True, error=response.error)] context = extract_context_metric(response, RenderableStructMetricContext) output_type = context.output_type valid_format = 0.0 match output_type.lower(): case "html": valid_format = float(is_valid_html(response.completion)) case _: raise ValueError(f"Unsupported format for RenderableStructMetric: {output_type}") assert context.keywords is not None, "Keywords must be provided in context" keywords = context.keywords assert isinstance(keywords, list), "Keywords must be a list of strings" has_keywords = 1.0 if keywords: has_keywords = sum(1 for keyword in keywords if keyword.lower() in response.completion.lower()) / len( keywords ) return [ MetricResult( metric_name=f"{self.NAME}/valid_format", value=valid_format, higher_is_better=True, ), MetricResult( metric_name=f"{self.NAME}/has_keywords", value=has_keywords, higher_is_better=True, ), ]
# adapted from: https://github.com/TIGER-AI-Lab/StructEval/blob/main/structeval/eval_engine/eval_utils.py
[docs] def tokenize_path(path: str) -> list[str]: """ Tokenize a dot-notation path, handling back-ticks and array indices. Args: path: The path string (e.g. "users.0.name" or "users[0].name") Returns: List of path tokens """ # Special‑case: treat CSV header paths as a single token if path.startswith("csv::"): return [path] tokens, buf, in_bt = [], "", False i, n = 0, len(path) while i < n: ch = path[i] # Toggle back-tick state if ch == "`": in_bt = not in_bt i += 1 continue # Dot delimiter (when not inside back-ticks) if ch == "." and not in_bt: if buf: tokens.append(buf) buf = "" i += 1 continue # Bracket "[index]" treated as separate token if ch == "[" and not in_bt: if buf: tokens.append(buf) buf = "" j = path.find("]", i) if j == -1: raise ValueError(f"Unclosed '[' in path: {path}") tokens.append(path[i : j + 1]) # e.g. "[0]" i = j + 1 continue # Regular character buf += ch i += 1 if buf: tokens.append(buf) return tokens
# adapted from: https://github.com/TIGER-AI-Lab/StructEval/blob/main/structeval/eval_engine/eval_utils.py
[docs] def path_exists(data: Any, path: str) -> bool: """ Check if a path exists in a structured data object. Args: data: The structured data to check path: The path to check (dot notation) Returns: True if path exists, False otherwise """ tokens = tokenize_path(path) def walk(node: Any, toks: list[str]) -> bool: if not toks: return True tok, *rest = toks # CSV header rule (root level only) if isinstance(node, dict) and "csv_headers" in node and tok.startswith("csv::"): header = tok[5:] return header in node["csv_headers"] and not rest # must be terminal # Wildcard if tok == "*": if isinstance(node, list): return any(walk(item, rest) for item in node) return False # Fixed index [n] if tok.startswith("[") and tok.endswith("]"): try: idx = int(tok[1:-1]) except ValueError: return False return isinstance(node, list) and 0 <= idx < len(node) and walk(node[idx], rest) # Dict key handling (JSON/YAML/TOML/XML) if isinstance(node, dict): # 1️⃣ Literal key match (works for "@id" too) if tok in node: return walk(node[tok], rest) # 2️⃣ XML attribute fallback: "@id" → "id" if tok.startswith("@"): attr = tok[1:] if attr in node: return walk(node[attr], rest) return False return walk(data, tokens)