Skip to content

EvaluationCache

gepa.core.state.EvaluationCache(_cache: dict[CacheKey, CachedEvaluation[RolloutOutput]] = dict()) dataclass

Bases: Generic[RolloutOutput, DataId]

Cache for storing evaluation results of (candidate, example) pairs.

Functions

get(candidate: dict[str, str], example_id: DataId) -> CachedEvaluation[RolloutOutput] | None

Retrieve cached evaluation result if it exists.

Source code in gepa/core/state.py
def get(self, candidate: dict[str, str], example_id: DataId) -> CachedEvaluation[RolloutOutput] | None:
    """Retrieve cached evaluation result if it exists."""
    return self._cache.get((_candidate_hash(candidate), example_id))

put(candidate: dict[str, str], example_id: DataId, output: RolloutOutput, score: float, objective_scores: ObjectiveScores | None = None) -> None

Store an evaluation result in the cache.

Source code in gepa/core/state.py
def put(
    self,
    candidate: dict[str, str],
    example_id: DataId,
    output: RolloutOutput,
    score: float,
    objective_scores: ObjectiveScores | None = None,
) -> None:
    """Store an evaluation result in the cache."""
    self._cache[(_candidate_hash(candidate), example_id)] = CachedEvaluation(output, score, objective_scores)

get_batch(candidate: dict[str, str], example_ids: list[DataId]) -> tuple[dict[DataId, CachedEvaluation[RolloutOutput]], list[DataId]]

Look up cached results for a batch. Returns (cached_results, uncached_ids).

Source code in gepa/core/state.py
def get_batch(
    self, candidate: dict[str, str], example_ids: list[DataId]
) -> tuple[dict[DataId, CachedEvaluation[RolloutOutput]], list[DataId]]:
    """Look up cached results for a batch. Returns (cached_results, uncached_ids)."""
    h = _candidate_hash(candidate)
    cached, uncached = {}, []
    for eid in example_ids:
        if entry := self._cache.get((h, eid)):
            cached[eid] = entry
        else:
            uncached.append(eid)
    return cached, uncached

put_batch(candidate: dict[str, str], example_ids: list[DataId], outputs: list[RolloutOutput], scores: list[float], objective_scores_list: Sequence[ObjectiveScores] | None = None) -> None

Store evaluation results for a batch of examples.

Source code in gepa/core/state.py
def put_batch(
    self,
    candidate: dict[str, str],
    example_ids: list[DataId],
    outputs: list[RolloutOutput],
    scores: list[float],
    objective_scores_list: Sequence[ObjectiveScores] | None = None,
) -> None:
    """Store evaluation results for a batch of examples."""
    h = _candidate_hash(candidate)
    for i, eid in enumerate(example_ids):
        self._cache[(h, eid)] = CachedEvaluation(
            outputs[i], scores[i], objective_scores_list[i] if objective_scores_list else None
        )

evaluate_with_cache_full(candidate: dict[str, str], example_ids: list[DataId], fetcher: Callable[[list[DataId]], Any], evaluator: Callable[[Any, dict[str, str]], tuple[Any, list[float], Sequence[ObjectiveScores] | None]]) -> tuple[dict[DataId, RolloutOutput], dict[DataId, float], dict[DataId, ObjectiveScores] | None, int]

Evaluate using cache, returning full results.

Returns (outputs_by_id, scores_by_id, objective_scores_by_id, num_actual_evals).

Source code in gepa/core/state.py
def evaluate_with_cache_full(
    self,
    candidate: dict[str, str],
    example_ids: list[DataId],
    fetcher: Callable[[list[DataId]], Any],
    evaluator: Callable[[Any, dict[str, str]], tuple[Any, list[float], Sequence[ObjectiveScores] | None]],
) -> tuple[dict[DataId, RolloutOutput], dict[DataId, float], dict[DataId, ObjectiveScores] | None, int]:
    """
    Evaluate using cache, returning full results.

    Returns (outputs_by_id, scores_by_id, objective_scores_by_id, num_actual_evals).
    """
    cached, uncached_ids = self.get_batch(candidate, example_ids)

    outputs_by_id: dict[DataId, RolloutOutput] = {eid: c.output for eid, c in cached.items()}
    scores_by_id: dict[DataId, float] = {eid: c.score for eid, c in cached.items()}
    objective_by_id: dict[DataId, ObjectiveScores] | None = None

    # Populate objective scores from cache
    for eid, c in cached.items():
        if c.objective_scores is not None:
            objective_by_id = objective_by_id or {}
            objective_by_id[eid] = c.objective_scores

    # Evaluate uncached examples
    if uncached_ids:
        batch = fetcher(uncached_ids)
        outputs, scores, obj_scores = evaluator(batch, candidate)
        for idx, eid in enumerate(uncached_ids):
            outputs_by_id[eid] = outputs[idx]
            scores_by_id[eid] = scores[idx]
            if obj_scores is not None:
                objective_by_id = objective_by_id or {}
                objective_by_id[eid] = obj_scores[idx]
        self.put_batch(candidate, uncached_ids, outputs, scores, obj_scores)

    return outputs_by_id, scores_by_id, objective_by_id, len(uncached_ids)