Source code for eval_framework.metrics.llm.utils
"""Utility functions for LLM-based metrics."""
[docs]
def order_answers_for_comparison(candidate: str, reference: str, swap: bool) -> tuple[str, str]:
"""Order candidate and reference answers for A/B comparison.
This function is used to mitigate position bias in LLM-as-judge evaluations
by optionally swapping the order in which answers are presented.
Args:
candidate: The candidate completion to evaluate.
reference: The reference/baseline completion.
swap: If True, swap the order (reference becomes A, candidate becomes B).
Returns:
Tuple of (answer_a, answer_b) in the correct order.
"""
if swap:
return reference, candidate
return candidate, reference