def optimize_anything(
seed_candidate: str | Candidate | None = None,
*,
evaluator: Callable[..., Any],
dataset: list[DataInst] | None = None,
valset: list[DataInst] | None = None,
objective: str | None = None,
background: str | None = None,
config: GEPAConfig | None = None,
) -> GEPAResult:
"""Optimize any text artifact using LLM-guided search.
This is the main entry point for GEPA. You declare the **what** — your
artifact, your evaluator, and any domain knowledge — and
``optimize_anything`` handles the **how**: prompt construction, reflection,
candidate selection, and Pareto-efficient search.
**Three optimization modes** (determined by ``dataset`` / ``valset``):
1. **Single-Task Search** (``dataset=None, valset=None``):
Solve one hard problem. The candidate *is* the solution.
Evaluator called without ``example``.
*E.g. circle packing, blackbox mathematical optimization.*
2. **Multi-Task Search** (``dataset=<list>, valset=None``):
Solve a batch of related problems with cross-task transfer.
Insights from solving one help solve the others.
``valset`` defaults to ``dataset``.
*E.g. CUDA kernel generation, multi-aspect SVG optimization.*
3. **Generalization** (``dataset=<list>, valset=<list>``):
Build a skill that transfers to unseen problems.
*E.g. prompt optimization for AIME math, agent architecture evolution
for ARC-AGI, cloud scheduling policy discovery.*
Args:
seed_candidate: Starting point for optimization.
- ``str`` — single text parameter (evaluator receives ``str``).
- ``dict[str, str]`` — named parameters (evaluator receives the dict).
- ``None`` — **seedless mode**: the reflection LLM generates the
initial candidate from ``objective`` (and optionally ``background``
/ ``dataset``). Requires ``objective``. Useful for creative or
exploratory tasks where you know *what good looks like* but not
where to begin.
evaluator: Scoring function. Returns ``(score, side_info)`` or ``score``.
See :class:`Evaluator`. Diagnostic output via ``oa.log()`` is
automatically captured as Actionable Side Information (ASI).
For richer diagnostics, return a ``(score, dict)`` tuple with
structured feedback, error messages, or even rendered images
(via :class:`~gepa.Image`).
dataset: Examples for multi-task or generalization modes.
``None`` = single-task search mode.
valset: Held-out validation set for generalization mode.
``None`` = defaults to ``dataset`` (multi-task search).
objective: Natural-language goal for the reflection LLM (e.g.
``"Generate prompts that solve competition math problems."``).
background: Domain knowledge, constraints, or strategies for the
reflection LLM.
config: Full configuration. See :class:`GEPAConfig`.
Returns:
:class:`~gepa.core.result.GEPAResult` — access ``result.best_candidate``
for the optimized parameter(s) and the full optimization history.
Examples:
Single-task search (circle packing)::
import gepa.optimize_anything as oa
def evaluate(candidate: str) -> float:
result = run_code(candidate)
oa.log(f"Score: {result.score}, Overlaps: {result.overlaps}")
return result.score
result = optimize_anything(
seed_candidate="def pack_circles(): ...",
evaluator=evaluate,
objective="Maximize the sum of radii for n circles in a unit square.",
config=GEPAConfig(engine=EngineConfig(max_metric_calls=500)),
)
Multi-task search (CUDA kernels)::
result = optimize_anything(
seed_candidate={"prompt": "Write an optimized CUDA kernel."},
evaluator=kernel_evaluator,
dataset=kernel_problems, # batch of related problems
objective="Generate prompts that produce fast, correct CUDA kernels.",
config=GEPAConfig(engine=EngineConfig(max_metric_calls=300)),
)
Generalization (prompt optimization for math)::
result = optimize_anything(
seed_candidate={"prompt": "Solve this math problem step by step:"},
evaluator=math_evaluator,
dataset=train_problems, # train on these
valset=val_problems, # must generalize to these
objective="Generate system prompts that improve math reasoning.",
config=GEPAConfig(engine=EngineConfig(max_metric_calls=200)),
)
Seedless mode (no starting artifact)::
result = optimize_anything(
seed_candidate=None, # LLM writes the first draft
evaluator=evaluate_3d_render,
dataset=visual_aspects,
objective="Optimize a Python program to generate a 3D unicorn.",
background="Use build123d for CSG geometry, export to STL, render with pyrender.",
)
"""
# Use default config if not provided
if config is None:
config = GEPAConfig()
# Detect seed generation mode: when seed_candidate is None, the LLM
# will generate the initial candidate from the objective.
needs_seed_generation = False
if seed_candidate is None:
needs_seed_generation = True
str_candidate_mode = True
if not objective or not objective.strip():
raise ValueError(
"'objective' is required when seed_candidate is None. "
"The reflection LLM needs the objective to generate an initial candidate."
)
seed_candidate = {_STR_CANDIDATE_KEY: ""} # placeholder until LLM generates it
else:
# Normalize seed_candidate: str -> {_STR_CANDIDATE_KEY: str}
str_candidate_mode = isinstance(seed_candidate, str)
if isinstance(seed_candidate, str):
seed_candidate = {_STR_CANDIDATE_KEY: seed_candidate}
# Detect single-instance mode: when both dataset=None and valset=None
single_instance_mode = dataset is None and valset is None
# Set reflection_minibatch_size default based on mode (if not explicitly set)
if config.reflection.reflection_minibatch_size is None:
config.reflection.reflection_minibatch_size = 1 if single_instance_mode else 3
# Handle single-instance mode: when both dataset=None and valset=None, create a
# dataset with a single sentinel element. The evaluator will be called
# without the example parameter.
if single_instance_mode:
effective_dataset: list[DataInst] = [_SINGLE_INSTANCE_SENTINEL] # type: ignore[list-item]
else:
effective_dataset = dataset if dataset is not None else [None] # type: ignore[list-item]
# Wrap the evaluator to handle signature normalization, log/stdout capture, etc.
wrapped_evaluator = EvaluatorWrapper(
evaluator,
single_instance_mode,
capture_stdio=config.engine.capture_stdio,
str_candidate_mode=str_candidate_mode,
raise_on_exception=config.engine.raise_on_exception,
)
# Resolve cache mode: cache_evaluation controls on/off, cache_evaluation_storage controls where
if not config.engine.cache_evaluation:
resolved_cache_mode = "off"
if config.engine.cache_evaluation_storage != "auto":
warnings.warn(
f"cache_evaluation_storage={config.engine.cache_evaluation_storage!r} is set but "
f"cache_evaluation=False, so caching is disabled. Set cache_evaluation=True to "
f"enable caching with the specified storage mode.",
stacklevel=2,
)
elif config.engine.cache_evaluation_storage == "auto":
resolved_cache_mode = "disk" if config.engine.run_dir else "memory"
else:
resolved_cache_mode = config.engine.cache_evaluation_storage
# Validate disk mode requires run_dir
if resolved_cache_mode == "disk" and not config.engine.run_dir:
raise ValueError("cache_evaluation_storage='disk' requires run_dir in EngineConfig")
# Configure cloudpickle for code execution subprocess serialization
from gepa.utils.code_execution import set_use_cloudpickle
set_use_cloudpickle(config.engine.use_cloudpickle)
active_adapter: GEPAAdapter = OptimizeAnythingAdapter(
evaluator=wrapped_evaluator,
parallel=config.engine.parallel,
max_workers=config.engine.max_workers,
refiner_config=config.refiner,
best_example_evals_k=config.engine.best_example_evals_k,
objective=objective,
background=background,
cache_mode=resolved_cache_mode,
cache_dir=config.engine.run_dir,
)
# Normalize datasets to DataLoader instances
train_loader = ensure_loader(effective_dataset)
val_loader = ensure_loader(valset) if valset is not None else train_loader
# --- 1. Build stoppers from the EngineConfig and root config ---
stop_callbacks_list: list[StopperProtocol] = []
# Add custom stop callbacks if provided
if config.stop_callbacks is not None:
if isinstance(config.stop_callbacks, Sequence):
stop_callbacks_list.extend(config.stop_callbacks)
else:
stop_callbacks_list.append(config.stop_callbacks)
# Add file stopper if run_dir is provided
if config.engine.run_dir is not None:
stop_file_path = os.path.join(config.engine.run_dir, "gepa.stop")
file_stopper = FileStopper(stop_file_path)
stop_callbacks_list.append(file_stopper)
# Add max_metric_calls stopper if provided
if config.engine.max_metric_calls is not None:
from gepa.utils import MaxMetricCallsStopper
max_calls_stopper = MaxMetricCallsStopper(config.engine.max_metric_calls)
stop_callbacks_list.append(max_calls_stopper)
# Add max_candidate_proposals stopper if provided
if config.engine.max_candidate_proposals is not None:
from gepa.utils import MaxCandidateProposalsStopper
proposals_stopper = MaxCandidateProposalsStopper(config.engine.max_candidate_proposals)
stop_callbacks_list.append(proposals_stopper)
# Assert that at least one stopping condition is provided
if not stop_callbacks_list:
raise ValueError(
"At least one stopping condition must be provided via config.engine.max_metric_calls or config.stop_callbacks."
)
# Create composite stopper if multiple stoppers, or use single stopper
stop_callback: StopperProtocol
if len(stop_callbacks_list) == 1:
stop_callback = stop_callbacks_list[0]
else:
from gepa.utils import CompositeStopper
stop_callback = CompositeStopper(*stop_callbacks_list)
# --- 2. Validate and setup reflection LM ---
if needs_seed_generation and config.reflection.reflection_lm is None:
raise ValueError(
"reflection_lm is required when seed_candidate is None. "
"Set config.reflection.reflection_lm to a model name or callable."
)
if not hasattr(active_adapter, "propose_new_texts"):
assert config.reflection.reflection_lm is not None, (
f"reflection_lm was not provided. The adapter '{active_adapter!s}' does not provide a propose_new_texts method, "
+ "and hence, GEPA will use the default proposer, which requires a reflection_lm to be specified."
)
# Default refiner_lm to reflection_lm name BEFORE converting reflection_lm to callable
if config.refiner is not None and config.refiner.refiner_lm is None:
config.refiner.refiner_lm = config.reflection.reflection_lm
# Convert reflection_lm string to callable
if isinstance(config.reflection.reflection_lm, str):
config.reflection.reflection_lm = make_litellm_lm(config.reflection.reflection_lm)
# Convert refiner_lm string to LiteLLM callable (if refiner is enabled)
if config.refiner is not None:
if isinstance(config.refiner.refiner_lm, str):
config.refiner.refiner_lm = make_litellm_lm(config.refiner.refiner_lm)
# Generate seed candidate via LLM if seed_candidate was None
if needs_seed_generation:
assert config.reflection.reflection_lm is not None and not isinstance(config.reflection.reflection_lm, str)
assert objective is not None # validated earlier in needs_seed_generation block
seed_candidate = _generate_seed_candidate(
lm=config.reflection.reflection_lm,
objective=objective,
background=background,
dataset=dataset,
logger=config.tracking.logger or StdOutLogger(),
)
# Auto-inject refiner_prompt into seed_candidate if refiner is enabled
if config.refiner is not None:
formatted_refiner_prompt = DEFAULT_REFINER_PROMPT.format(
objective=objective or "Maximize the score",
background=background or "No additional background provided.",
)
if "refiner_prompt" not in seed_candidate:
seed_candidate["refiner_prompt"] = formatted_refiner_prompt
# If user provides their own refiner_prompt, use it (allows custom refiner prompts)
# Setup default logger if not provided
if config.tracking.logger is None:
if config.engine.run_dir is not None:
os.makedirs(config.engine.run_dir, exist_ok=True)
config.tracking.logger = Logger(os.path.join(config.engine.run_dir, "run_log.txt"))
else:
config.tracking.logger = StdOutLogger()
# --- 3. Setup random number generator ---
rng = random.Random(config.engine.seed)
# --- 4. Build candidate selector from EngineConfig ---
candidate_selector: CandidateSelector
if isinstance(config.engine.candidate_selection_strategy, str):
factories = {
"pareto": lambda: ParetoCandidateSelector(rng=rng),
"current_best": lambda: CurrentBestCandidateSelector(),
"epsilon_greedy": lambda: EpsilonGreedyCandidateSelector(epsilon=0.1, rng=rng),
"top_k_pareto": lambda: TopKParetoCandidateSelector(k=5, rng=rng),
}
try:
candidate_selector = factories[config.engine.candidate_selection_strategy]()
except KeyError as exc:
raise ValueError(
f"Unknown candidate_selector strategy: {config.engine.candidate_selection_strategy}. "
"Supported strategies: 'pareto', 'current_best', 'epsilon_greedy', 'top_k_pareto'"
) from exc
elif isinstance(config.engine.candidate_selection_strategy, CandidateSelector):
candidate_selector = config.engine.candidate_selection_strategy
else:
raise TypeError(
"candidate_selection_strategy must be a supported string strategy or an instance of CandidateSelector."
)
# --- 5. Build evaluation policy from EngineConfig ---
if config.engine.val_evaluation_policy is None or config.engine.val_evaluation_policy == "full_eval":
config.engine.val_evaluation_policy = FullEvaluationPolicy()
elif not isinstance(config.engine.val_evaluation_policy, EvaluationPolicy):
raise ValueError(
f"val_evaluation_policy should be 'full_eval' or an EvaluationPolicy instance, but got {type(config.engine.val_evaluation_policy)}"
)
# --- 6. Build module selector from ReflectionConfig ---
if isinstance(config.reflection.module_selector, str):
module_selector_cls = {
"round_robin": RoundRobinReflectionComponentSelector,
"all": AllReflectionComponentSelector,
}.get(config.reflection.module_selector)
assert module_selector_cls is not None, (
f"Unknown module_selector strategy: {config.reflection.module_selector}. "
"Supported strategies: 'round_robin', 'all'"
)
module_selector_instance: ReflectionComponentSelector = module_selector_cls()
else:
module_selector_instance = config.reflection.module_selector
# --- 7. Build batch sampler from ReflectionConfig ---
if config.reflection.batch_sampler == "epoch_shuffled":
config.reflection.batch_sampler = EpochShuffledBatchSampler(
minibatch_size=config.reflection.reflection_minibatch_size, rng=rng
)
# --- 8. Build experiment tracker from TrackingConfig ---
experiment_tracker = create_experiment_tracker(
use_wandb=config.tracking.use_wandb,
wandb_api_key=config.tracking.wandb_api_key,
wandb_init_kwargs=config.tracking.wandb_init_kwargs,
wandb_attach_existing=config.tracking.wandb_attach_existing,
wandb_step_metric=config.tracking.wandb_step_metric,
use_mlflow=config.tracking.use_mlflow,
mlflow_tracking_uri=config.tracking.mlflow_tracking_uri,
mlflow_experiment_name=config.tracking.mlflow_experiment_name,
mlflow_attach_existing=config.tracking.mlflow_attach_existing,
key_prefix=config.tracking.key_prefix,
)
# --- 9. Build reflection prompt template from objective/background if provided ---
# Check for conflicting configuration: user cannot provide both objective/background
# AND a custom reflection_prompt_template (these are mutually exclusive approaches)
user_provided_custom_template = (
config.reflection.reflection_prompt_template is not None
and config.reflection.reflection_prompt_template != optimize_anything_reflection_prompt_template
)
# Treat empty strings as "not provided" - only non-empty strings count
user_provided_objective_or_background = bool(objective) or bool(background)
if user_provided_custom_template and user_provided_objective_or_background:
raise ValueError(
"Cannot specify both 'objective'/'background' parameters and a custom "
"'config.reflection.reflection_prompt_template'. These are mutually exclusive options. "
"Either use objective/background to auto-generate a reflection prompt, or provide "
"your own custom template via config.reflection.reflection_prompt_template."
)
# If objective or background are provided, build a custom reflection prompt template
# with those values filled in, creating a template with <curr_param> and <side_info> placeholders
if user_provided_objective_or_background:
config.reflection.reflection_prompt_template = _build_reflection_prompt_template(
objective=objective, background=background
)
# --- 10. Validate reflection prompt template ---
if config.reflection.reflection_prompt_template is not None:
assert not (active_adapter is not None and getattr(active_adapter, "propose_new_texts", None) is not None), (
f"Adapter {active_adapter!s} provides its own propose_new_texts method; "
"reflection_prompt_template will be ignored. Set reflection_prompt_template to None."
)
# Validate template(s) - can be a single string or dict of templates
from gepa.strategies.instruction_proposal import InstructionProposalSignature
if isinstance(config.reflection.reflection_prompt_template, dict):
for param_name, template in config.reflection.reflection_prompt_template.items():
try:
InstructionProposalSignature.validate_prompt_template(template)
except ValueError as e:
raise ValueError(f"Invalid reflection_prompt_template for parameter '{param_name}': {e}") from e
else:
InstructionProposalSignature.validate_prompt_template(config.reflection.reflection_prompt_template)
# --- 11. Build reflective proposer from ReflectionConfig ---
reflective_proposer = ReflectiveMutationProposer(
logger=config.tracking.logger,
trainset=train_loader,
adapter=active_adapter,
candidate_selector=candidate_selector,
module_selector=module_selector_instance,
batch_sampler=config.reflection.batch_sampler,
perfect_score=config.reflection.perfect_score,
skip_perfect_score=config.reflection.skip_perfect_score,
experiment_tracker=experiment_tracker,
reflection_lm=config.reflection.reflection_lm,
reflection_prompt_template=config.reflection.reflection_prompt_template,
custom_candidate_proposer=config.reflection.custom_candidate_proposer,
)
# Define evaluator function for merge proposer
def merge_evaluator(
inputs: list[DataInst], prog: Candidate
) -> tuple[list[object], list[float], list[dict[str, float]] | None]:
eval_out = active_adapter.evaluate(inputs, prog, capture_traces=False)
return eval_out.outputs, eval_out.scores, eval_out.objective_scores
# --- 12. Build merge proposer from MergeConfig (if provided) ---
merge_proposer: MergeProposer | None = None
if config.merge is not None:
merge_proposer = MergeProposer(
logger=config.tracking.logger,
valset=val_loader,
evaluator=merge_evaluator,
use_merge=True,
max_merge_invocations=config.merge.max_merge_invocations,
rng=rng,
val_overlap_floor=config.merge.merge_val_overlap_floor,
)
# --- 13. Create evaluation cache if enabled ---
evaluation_cache: EvaluationCache[Any, Any] | None = None
if config.engine.cache_evaluation:
evaluation_cache = EvaluationCache[Any, Any]()
# --- 14. Build the main engine from EngineConfig ---
engine = GEPAEngine(
adapter=active_adapter,
run_dir=config.engine.run_dir,
valset=val_loader,
seed_candidate=seed_candidate,
perfect_score=config.reflection.perfect_score,
seed=config.engine.seed,
reflective_proposer=reflective_proposer,
merge_proposer=merge_proposer,
frontier_type=config.engine.frontier_type,
logger=config.tracking.logger,
experiment_tracker=experiment_tracker,
track_best_outputs=config.engine.track_best_outputs,
display_progress_bar=config.engine.display_progress_bar,
raise_on_exception=config.engine.raise_on_exception,
stop_callback=stop_callback,
val_evaluation_policy=config.engine.val_evaluation_policy,
use_cloudpickle=config.engine.use_cloudpickle,
evaluation_cache=evaluation_cache,
)
# --- 15. Run optimization ---
logger = config.tracking.logger
with experiment_tracker:
if isinstance(logger, Logger):
with logger:
state = engine.run()
else:
state = engine.run()
return GEPAResult.from_state(
state,
run_dir=config.engine.run_dir,
seed=config.engine.seed,
str_candidate_key=_STR_CANDIDATE_KEY if str_candidate_mode else None,
)