Skip to content

MCPAdapter

gepa.adapters.mcp_adapter.mcp_adapter.MCPAdapter(tool_names: str | list[str], task_model: str | Callable, metric_fn: Callable[[MCPDataInst, str], float], server_params: StdioServerParameters | None = None, remote_url: str | None = None, remote_transport: str = 'sse', remote_headers: dict[str, str] | None = None, remote_timeout: float = 30, base_system_prompt: str = 'You are a helpful assistant with access to tools.', enable_two_pass: bool = True, failure_score: float = 0.0)

Bases: GEPAAdapter[MCPDataInst, MCPTrajectory, MCPOutput]

GEPA adapter for optimizing MCP tool usage.

This adapter enables optimization of: - Tool descriptions (single or multiple tools) - System prompts for tool usage guidance - Tool selection logic

Features: - Multi-tool support: Optimize multiple tools simultaneously - Two-pass workflow: Tool call + answer generation - Multiple transports: stdio (local), SSE, StreamableHTTP (remote) - Reflective datasets: Generate training data for refinement

Example (Local): >>> from mcp import StdioServerParameters >>> adapter = MCPAdapter( ... tool_names=["read_file", "write_file"], ... task_model="gpt-4o-mini", ... metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0, ... server_params=StdioServerParameters( ... command="python", ... args=["server.py"], ... ), ... )

Example (Remote): >>> adapter = MCPAdapter( ... tool_names="search_web", ... task_model="gpt-4o-mini", ... metric_fn=accuracy_metric, ... remote_url="https://mcp-server.com/sse", ... remote_transport="sse", ... )

Initialize MCPAdapter.

Parameters:

Name Type Description Default
tool_names str | list[str]

Name(s) of tool(s) to optimize (str or list[str])

required
task_model str | Callable

Model for task execution (litellm string or callable)

required
metric_fn Callable[[MCPDataInst, str], float]

Scoring function: (data_inst, output) -> float

required
server_params StdioServerParameters | None

Local MCP server configuration (stdio)

None
remote_url str | None

Remote MCP server URL

None
remote_transport str

"sse" or "streamable_http"

'sse'
remote_headers dict[str, str] | None

HTTP headers for remote (e.g., auth tokens)

None
remote_timeout float

Timeout for remote HTTP operations

30
base_system_prompt str

Base system prompt template

'You are a helpful assistant with access to tools.'
enable_two_pass bool

Use two-pass workflow (tool + answer)

True
failure_score float

Score assigned when execution fails

0.0
Source code in gepa/adapters/mcp_adapter/mcp_adapter.py
def __init__(
    self,
    tool_names: str | list[str],
    task_model: str | Callable,
    metric_fn: Callable[[MCPDataInst, str], float],
    # Local server configuration
    server_params: StdioServerParameters | None = None,
    # Remote server configuration
    remote_url: str | None = None,
    remote_transport: str = "sse",
    remote_headers: dict[str, str] | None = None,
    remote_timeout: float = 30,
    # Adapter configuration
    base_system_prompt: str = "You are a helpful assistant with access to tools.",
    enable_two_pass: bool = True,
    failure_score: float = 0.0,
):
    """
    Initialize MCPAdapter.

    Args:
        tool_names: Name(s) of tool(s) to optimize (str or list[str])
        task_model: Model for task execution (litellm string or callable)
        metric_fn: Scoring function: (data_inst, output) -> float
        server_params: Local MCP server configuration (stdio)
        remote_url: Remote MCP server URL
        remote_transport: "sse" or "streamable_http"
        remote_headers: HTTP headers for remote (e.g., auth tokens)
        remote_timeout: Timeout for remote HTTP operations
        base_system_prompt: Base system prompt template
        enable_two_pass: Use two-pass workflow (tool + answer)
        failure_score: Score assigned when execution fails
    """
    # Store transport configuration
    self.server_params = server_params
    self.remote_url = remote_url
    self.remote_transport = remote_transport
    self.remote_headers = remote_headers or {}
    self.remote_timeout = remote_timeout

    # Normalize tool_names to list
    self.tool_names = [tool_names] if isinstance(tool_names, str) else tool_names

    # Store adapter configuration
    self.base_system_prompt = base_system_prompt
    self.enable_two_pass = enable_two_pass
    self.failure_score = failure_score
    self.metric_fn = metric_fn

    # Setup model
    if isinstance(task_model, str):
        import litellm

        self.litellm = litellm
    self.task_model = task_model

Attributes

server_params = server_params instance-attribute

remote_url = remote_url instance-attribute

remote_transport = remote_transport instance-attribute

remote_headers = remote_headers or {} instance-attribute

remote_timeout = remote_timeout instance-attribute

tool_names = [tool_names] if isinstance(tool_names, str) else tool_names instance-attribute

base_system_prompt = base_system_prompt instance-attribute

enable_two_pass = enable_two_pass instance-attribute

failure_score = failure_score instance-attribute

metric_fn = metric_fn instance-attribute

litellm = litellm instance-attribute

task_model = task_model instance-attribute

propose_new_texts: ProposalFn | None = None class-attribute instance-attribute

Functions

evaluate(batch: list[MCPDataInst], candidate: dict[str, str], capture_traces: bool = False) -> EvaluationBatch[MCPTrajectory, MCPOutput]

Evaluate candidate on batch using MCP tools.

Parameters:

Name Type Description Default
batch list[MCPDataInst]

Dataset items to evaluate

required
candidate dict[str, str]

Component mapping (e.g., {"tool_description": "..."})

required
capture_traces bool

Whether to capture detailed trajectories

False

Returns:

Type Description
EvaluationBatch[MCPTrajectory, MCPOutput]

EvaluationBatch with outputs, scores, and optional trajectories

Source code in gepa/adapters/mcp_adapter/mcp_adapter.py
def evaluate(
    self,
    batch: list[MCPDataInst],
    candidate: dict[str, str],
    capture_traces: bool = False,
) -> EvaluationBatch[MCPTrajectory, MCPOutput]:
    """
    Evaluate candidate on batch using MCP tools.

    Args:
        batch: Dataset items to evaluate
        candidate: Component mapping (e.g., {"tool_description": "..."})
        capture_traces: Whether to capture detailed trajectories

    Returns:
        EvaluationBatch with outputs, scores, and optional trajectories
    """
    return asyncio.run(self._evaluate_async(batch, candidate, capture_traces))

make_reflective_dataset(candidate: dict[str, str], eval_batch: EvaluationBatch[MCPTrajectory, MCPOutput], components_to_update: list[str]) -> dict[str, list[dict[str, Any]]]

Build reflective dataset for instruction refinement.

Parameters:

Name Type Description Default
candidate dict[str, str]

Current candidate components

required
eval_batch EvaluationBatch[MCPTrajectory, MCPOutput]

Evaluation results with trajectories

required
components_to_update list[str]

Which components to generate data for

required

Returns:

Type Description
dict[str, list[dict[str, Any]]]

Dictionary mapping component names to reflective examples

Source code in gepa/adapters/mcp_adapter/mcp_adapter.py
def make_reflective_dataset(
    self,
    candidate: dict[str, str],
    eval_batch: EvaluationBatch[MCPTrajectory, MCPOutput],
    components_to_update: list[str],
) -> dict[str, list[dict[str, Any]]]:
    """
    Build reflective dataset for instruction refinement.

    Args:
        candidate: Current candidate components
        eval_batch: Evaluation results with trajectories
        components_to_update: Which components to generate data for

    Returns:
        Dictionary mapping component names to reflective examples
    """
    reflective_data: dict[str, list[dict[str, Any]]] = {}

    for component in components_to_update:
        examples: list[dict[str, Any]] = []

        for traj, score, _output in zip(
            eval_batch.trajectories or [],
            eval_batch.scores,
            eval_batch.outputs,
            strict=False,
        ):
            if component == "tool_description":
                feedback = self._generate_tool_feedback(traj, score)
                examples.append(
                    {
                        "Inputs": {
                            "user_query": traj["user_query"],
                            "tool_description": traj["tool_description_used"],
                        },
                        "Generated Outputs": {
                            "tool_called": traj["tool_called"],
                            "selected_tool": traj["selected_tool"],
                            "tool_arguments": traj["tool_arguments"],
                            "final_answer": traj["model_final_output"],
                        },
                        "Feedback": feedback,
                    }
                )

            elif component == "system_prompt":
                feedback = self._generate_system_prompt_feedback(traj, score)
                examples.append(
                    {
                        "Inputs": {
                            "user_query": traj["user_query"],
                            "system_prompt": traj["system_prompt_used"],
                        },
                        "Generated Outputs": traj["model_final_output"],
                        "Feedback": feedback,
                    }
                )

        reflective_data[component] = examples

    return reflective_data