Source code for eval_framework.metrics.llm.utils

"""Utility functions for LLM-based metrics."""



[docs]
def order_answers_for_comparison(candidate: str, reference: str, swap: bool) -> tuple[str, str]:
    """Order candidate and reference answers for A/B comparison.

    This function is used to mitigate position bias in LLM-as-judge evaluations
    by optionally swapping the order in which answers are presented.

    Args:
        candidate: The candidate completion to evaluate.
        reference: The reference/baseline completion.
        swap: If True, swap the order (reference becomes A, candidate becomes B).

    Returns:
        Tuple of (answer_a, answer_b) in the correct order.
    """
    if swap:
        return reference, candidate
    return candidate, reference