From 500b37bd2611acd9230643a8a125da9c420d3954 Mon Sep 17 00:00:00 2001 From: chacha Date: Tue, 14 Apr 2026 00:16:59 +0200 Subject: [PATCH] update --- .vscode/extensions.json | 7 + .vscode/launch.json | 46 +++++++ .vscode/settings.json | 22 ++- .vscode/tasks.json | 37 +++++ README.md | 108 +++++++++++++-- docs/SPEC.md | 235 ++++++++++++++++++++++++++++++++ examples/config.example.json | 75 +++++----- pyMCPBroker/__init__.py | 2 +- pyMCPBroker/app.py | 9 +- pyMCPBroker/backend_stdio.py | 11 +- pyMCPBroker/broker.py | 133 +++++++----------- pyMCPBroker/main.py | 35 +++-- pyMCPBroker/models.py | 37 ++++- pyMCPBroker/overrides.py | 14 +- pyMCPBroker/tree.py | 127 ++++++++++++----- pyproject.toml | 4 +- smoke_config.json | 10 +- tests/test_broker_end_to_end.py | 135 ++++++++++++------ tests/test_config.py | 57 +++++++- tests/test_filters.py | 6 + 20 files changed, 842 insertions(+), 268 deletions(-) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json create mode 100644 docs/SPEC.md diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..e9e20f2 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.debugpy" + ] +} diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..b18a640 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,46 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: pyMCPBroker (smoke config)", + "type": "debugpy", + "request": "launch", + "module": "pyMCPBroker", + "cwd": "${workspaceFolder}", + "args": [ + "127.0.0.1:8100", + "smoke_config.json", + "--log-level", + "debug" + ], + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Python: pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "cwd": "${workspaceFolder}", + "args": [ + "tests", + "-q" + ], + "console": "integratedTerminal", + "justMyCode": false + }, + { + "name": "Python: pytest current file", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "cwd": "${workspaceFolder}", + "args": [ + "${relativeFile}", + "-q" + ], + "console": "integratedTerminal", + "justMyCode": false + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 9b38853..d4e65dd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,17 @@ { - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "python.testing.pytestArgs": [ + "tests", + "-q" + ], + "python.testing.cwd": "${workspaceFolder}", + "python.analysis.extraPaths": [ + "${workspaceFolder}" + ], + "python.envFile": "${workspaceFolder}/.env", + "files.exclude": { + "**/__pycache__": true, + "**/.pytest_cache": true + } +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..dbacfea --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,37 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Tests: pytest", + "type": "shell", + "command": "python -m pytest tests -q", + "options": { + "cwd": "${workspaceFolder}" + }, + "group": { + "kind": "test", + "isDefault": true + }, + "problemMatcher": [] + }, + { + "label": "Tests: current file", + "type": "shell", + "command": "python -m pytest ${relativeFile} -q", + "options": { + "cwd": "${workspaceFolder}" + }, + "group": "test", + "problemMatcher": [] + }, + { + "label": "Run: broker (smoke config)", + "type": "shell", + "command": "python -m pyMCPBroker 127.0.0.1:8100 smoke_config.json --log-level info", + "options": { + "cwd": "${workspaceFolder}" + }, + "problemMatcher": [] + } + ] +} diff --git a/README.md b/README.md index dae0c5b..43783b5 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,36 @@ # pyMCPBroker -Small FastAPI broker exposing three stable meta-tools over MCP backends: +Small FastAPI broker exposing three stable meta-tools over MCP `stdio` sources: - `meta_tree` - `meta_desc` - `meta_call` -## Scope +The broker does not expose raw MCP tools directly. A model first discovers paths with `meta_tree`, inspects a leaf with `meta_desc`, then executes it with `meta_call`. -Current implementation: +## What is implemented -- MCP `stdio` backend only -- persistent subprocess per backend -- `initialize` + `notifications/initialized` -- paginated `tools/list` -- `tools/call` -- allow/deny wildcard filters -- backend overrides +- MCP `stdio` sources only +- persistent subprocess per unique source command +- `initialize`, `notifications/initialized`, `tools/list`, `tools/call` +- source-side tool auto-discovery +- optional allow/deny glob filters +- optional tool overrides +- optional path aliases - broker-side JSON Schema validation -- compact structured error payloads -- result truncation with shape-preserving best effort -- optional shared secret (`Authorization: Bearer ` or `X-Api-Key`) +- structured errors +- compact result truncation +- optional shared secret + +## Install + +```bash +pip install . +``` ## Run ```bash -pip install . python -m pyMCPBroker 0.0.0.0:8100 /config.json ``` @@ -35,4 +40,77 @@ Optional shared secret: python -m pyMCPBroker 0.0.0.0:8100 /config.json mysecret ``` -Example config: `examples/config.example.json` +## Config model + +User-facing config is tree-first. You mount a real source inline on a node with `source`. The broker discovers its tools automatically and exposes them as child leaves of that node. + +A declared root node is optional. If `tree` is a list, `/` is created implicitly. + +Example: + +```json +{ + "tree": [ + { + "path": "/repo", + "type": "node", + "summary": "Repository operations", + "children": [ + { + "path": "/repo/read", + "type": "node", + "summary": "Read repository data", + "source": { + "backend": "stdio", + "command": "/opt/gitea-mcp/gitea-mcp --host ${GITEA_URL} --token ${GITEA_TOKEN}", + "tool_filter": ["get_*", "list_*", "search_*", "!delete_*", "!create_*"], + "path_aliases": { + "get_file_contents": "get_file" + }, + "tool_overrides": { + "get_file_contents": { + "summary": "Read one file from a repository", + "max_output_chars": 12000, + "example_args": { + "owner": "myorg", + "repo": "demo-repo", + "ref": "main", + "filePath": "README.md" + } + } + } + } + } + ] + } + ] +} +``` + +With that config, the broker auto-exposes paths such as: + +- `/repo/read/get_file` +- `/repo/read/list_branches` +- `/repo/read/search_code` + +depending on what the mounted MCP source actually reports through `tools/list`. + +## Notes + +- `tool_filter` is optional. If omitted, all tools from the source are exposed. +- `tool_overrides` is optional. +- filter semantics are unordered: + - positive patterns allow + - `!pattern` denies + - if there is no positive pattern, all tools are allowed first, then deny rules are applied +- `path_aliases` only renames exposed leaf names; it does not change the real MCP tool name. + +## Files + +- `docs/SPEC.md`: user-facing spec for the implemented behavior +- `examples/config.example.json`: example config + + +## VS Code + +The repository includes a minimal `.vscode/` setup for running and debugging pytest and for launching the broker with `smoke_config.json`. diff --git a/docs/SPEC.md b/docs/SPEC.md new file mode 100644 index 0000000..999a870 --- /dev/null +++ b/docs/SPEC.md @@ -0,0 +1,235 @@ +# pyMCPBroker Specification + +## Purpose + +pyMCPBroker exposes a very small stable API to a language model while wrapping one or more MCP servers running over `stdio`. + +The model never sees raw MCP tools directly. It only sees three stable meta-tools: + +- `meta_tree` +- `meta_desc` +- `meta_call` + +Normal workflow: + +1. call `meta_tree` to navigate +2. call `meta_desc` on a leaf path +3. call `meta_call` with arguments matching the schema returned by `meta_desc` + +## Terminology + +- **entry**: logical element exposed to the model +- `type=node`: navigation node +- `type=tool`: callable leaf +- **path**: stable absolute URL-like identifier +- **source**: real MCP backend mounted on a node +- `backend=stdio`: MCP process launched locally over stdin/stdout + +## Public API + +### `POST /meta_tree` + +Input: + +```json +{ + "path": "/" +} +``` + +Returns the direct children of a node path. + +Errors if the path does not exist or if it points to a tool leaf. + +### `POST /meta_desc` + +Input: + +```json +{ + "path": "/repo/read/get_file" +} +``` + +For a node path, returns node metadata and optionally summarized children. + +For a tool path, returns: + +- stable path +- summary +- description +- exact `args_schema` +- optional `example_args` + +### `POST /meta_call` + +Input: + +```json +{ + "path": "/repo/read/get_file", + "args": { + "owner": "myorg", + "repo": "demo-repo", + "ref": "main", + "filePath": "README.md" + } +} +``` + +The broker validates `args` against the dynamic schema previously returned by `meta_desc`, then calls the real MCP tool. + +## Config format + +The config is static JSON loaded at startup. + +Top-level shape: + +```json +{ + "tree": [ ... ] +} +``` + +or: + +```json +{ + "tree": { + "path": "/", + "type": "node", + "children": [ ... ] + } +} +``` + +The explicit root node is optional. If omitted, `/` is created implicitly. + +### Node fields + +- `path` +- `type="node"` +- `summary` optional +- `description` optional +- `children` optional +- `source` optional + +### Source fields + +- `backend`: currently only `"stdio"` +- `command`: shell command to launch the MCP server +- `tool_filter`: optional unordered list of allow/deny glob patterns +- `tool_overrides`: optional per-tool overrides +- `path_aliases`: optional mapping from real MCP tool name to exposed leaf name + +### Environment variables + +`${ENV_VAR}` substitution is supported in strings, especially in `command`. + +Missing variables fail at startup. + +## Source mounting model + +A `source` mounted on a node causes the broker to: + +1. start the MCP process +2. initialize the MCP session +3. fetch `tools/list` +4. apply `tool_filter` +5. apply `tool_overrides` +6. expose the remaining tools as child leaves under the node path + +The exposed leaf path is: + +- `parent_path + / + alias`, if `path_aliases` defines one +- otherwise `parent_path + / + tool_name` + +Example: + +- node path: `/repo/read` +- real tool: `get_file_contents` +- alias: `get_file` +- exposed path: `/repo/read/get_file` + +## Filter semantics + +`tool_filter` is optional. + +Positive patterns allow tools. Patterns prefixed with `!` deny tools. + +Rules: + +- if there is no positive pattern, all tools are allowed first, then deny rules are applied +- if at least one positive pattern exists, only tools matching a positive pattern are allowed, then deny rules are applied +- pattern order does not matter + +Examples: + +- `[]` → expose all tools +- `["!delete_*"]` → expose everything except delete tools +- `["get_*", "list_*"]` → expose only get/list tools +- `["get_*", "!get_secret_*"]` → expose get tools except secret ones + +## Overrides + +`tool_overrides` is optional. + +Supported fields: + +- `summary` +- `description` +- `max_output_chars` +- `timeout` +- `example_args` +- `render_mode` + +They only affect the broker-facing presentation and execution limits. They do not rename the real MCP tool. + +## Output normalization + +The broker can truncate large outputs using `max_output_chars`. + +Current behavior: + +- preserve JSON structure when possible +- truncate long strings first +- compact long lists if needed +- return an explicit wrapper when truncation happened + +## Internal MCP support + +Current transport support: + +- MCP `stdio` only + +Required MCP methods: + +- `initialize` +- `tools/list` +- `tools/call` + +The broker also sends `notifications/initialized` after initialization. + +## CLI + +```bash +python -m pyMCPBroker 0.0.0.0:8100 /config.json +``` + +Optional shared secret: + +```bash +python -m pyMCPBroker 0.0.0.0:8100 /config.json mysecret +``` + +Accepted options: + +- `--reload` +- `--ignore-broken-tool` +- `--log-level` +- `--dump-tree` + + +## Repository editor config + +The repository may include a `.vscode/` directory with recommended Python extensions plus launch/task settings for pytest and for starting the broker against `smoke_config.json`. This editor config is optional and does not affect runtime behavior. diff --git a/examples/config.example.json b/examples/config.example.json index 112b164..0bb9701 100644 --- a/examples/config.example.json +++ b/examples/config.example.json @@ -1,49 +1,36 @@ { - "backends": { - "gitea": { - "backend": "stdio", - "command": "/opt/gitea-mcp/gitea-mcp --host ${GITEA_URL} --token ${GITEA_TOKEN}", - "tool_filter": ["get_*", "list_*", "search_*", "!delete_*", "!create_*"], - "tool_overrides": { - "get_file_contents": { - "summary": "Read one file from a repository", - "max_output_chars": 12000, - "example_args": { - "owner": "myorg", - "repo": "demo-repo", - "ref": "main", - "filePath": "README.md" + "tree": [ + { + "path": "/repo", + "type": "node", + "summary": "Repository operations", + "children": [ + { + "path": "/repo/read", + "type": "node", + "summary": "Read repository data", + "source": { + "backend": "stdio", + "command": "/opt/gitea-mcp/gitea-mcp --host ${GITEA_URL} --token ${GITEA_TOKEN}", + "tool_filter": ["get_*", "list_*", "search_*", "!delete_*", "!create_*"], + "path_aliases": { + "get_file_contents": "get_file" + }, + "tool_overrides": { + "get_file_contents": { + "summary": "Read one file from a repository", + "max_output_chars": 12000, + "example_args": { + "owner": "myorg", + "repo": "demo-repo", + "ref": "main", + "filePath": "README.md" + } + } + } } } - } + ] } - }, - "tree": { - "path": "/", - "type": "node", - "summary": "Root", - "children": [ - { - "path": "/repo", - "type": "node", - "summary": "Repository operations", - "children": [ - { - "path": "/repo/read", - "type": "node", - "summary": "Read repository data", - "children": [ - { - "path": "/repo/read/get_file", - "type": "tool", - "summary": "Read one file", - "backend_ref": "gitea", - "tool_name": "get_file_contents" - } - ] - } - ] - } - ] - } + ] } diff --git a/pyMCPBroker/__init__.py b/pyMCPBroker/__init__.py index 07c5de9..dea755d 100644 --- a/pyMCPBroker/__init__.py +++ b/pyMCPBroker/__init__.py @@ -1,2 +1,2 @@ __all__ = ["__version__"] -__version__ = "0.1.0" +__version__ = "0.2.1" diff --git a/pyMCPBroker/app.py b/pyMCPBroker/app.py index ffe2b4b..3bdcca2 100644 --- a/pyMCPBroker/app.py +++ b/pyMCPBroker/app.py @@ -5,6 +5,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI, Request from fastapi.responses import JSONResponse +from . import __version__ from .broker import Broker, BrokerError from .models import MetaCallRequest, MetaDescRequest, MetaTreeRequest @@ -20,9 +21,9 @@ def create_app(broker: Broker, shared_secret: str | None = None) -> FastAPI: app = FastAPI( title="pyMCPBroker", - version="0.1.0", + version=__version__, description=( - "Expose three stable meta-tools over MCP backends. " + "Expose three stable meta-tools over MCP stdio sources. " "Use meta_tree to navigate paths, meta_desc to inspect a path, and meta_call to execute a tool path." ), lifespan=lifespan, @@ -40,13 +41,13 @@ def create_app(broker: Broker, shared_secret: str | None = None) -> FastAPI: @app.exception_handler(BrokerError) async def broker_error_handler(_: Request, exc: BrokerError) -> JSONResponse: status = 400 - if exc.error_code in {"backend_unavailable", "backend_call_failed", "missing_backend_tool"}: + if exc.error_code in {"backend_unavailable", "backend_call_failed", "empty_source"}: status = 502 return JSONResponse(exc.as_payload(), status_code=status) @app.get("/") async def root() -> dict[str, object]: - return {"ok": True, "service": "pyMCPBroker", "meta_tools": ["meta_tree", "meta_desc", "meta_call"]} + return {"ok": True, "service": "pyMCPBroker", "version": __version__, "meta_tools": ["meta_tree", "meta_desc", "meta_call"]} @app.post( "/meta_tree", diff --git a/pyMCPBroker/backend_stdio.py b/pyMCPBroker/backend_stdio.py index 055fa58..cb20225 100644 --- a/pyMCPBroker/backend_stdio.py +++ b/pyMCPBroker/backend_stdio.py @@ -4,11 +4,12 @@ import json import logging import subprocess import threading -import time from dataclasses import dataclass, field from queue import Queue from typing import Any +from . import __version__ + logger = logging.getLogger(__name__) MCP_PROTOCOL_VERSION = "2025-06-18" @@ -61,7 +62,7 @@ class MCPStdioBackend: { "protocolVersion": MCP_PROTOCOL_VERSION, "capabilities": {}, - "clientInfo": {"name": "pyMCPBroker", "version": "0.1.0"}, + "clientInfo": {"name": "pyMCPBroker", "version": __version__}, }, timeout=10, ) @@ -181,8 +182,4 @@ class MCPStdioBackend: return tools def call_tool(self, tool_name: str, arguments: dict[str, Any], timeout: float = 30) -> dict[str, Any]: - return self.request( - "tools/call", - params={"name": tool_name, "arguments": arguments}, - timeout=timeout, - ) + return self.request("tools/call", params={"name": tool_name, "arguments": arguments}, timeout=timeout) diff --git a/pyMCPBroker/broker.py b/pyMCPBroker/broker.py index baf6d8f..a526d98 100644 --- a/pyMCPBroker/broker.py +++ b/pyMCPBroker/broker.py @@ -8,11 +8,9 @@ from jsonschema import Draft202012Validator from jsonschema.exceptions import ValidationError from .backend_stdio import MCPError, MCPStdioBackend -from .filters import is_allowed -from .models import BackendConfig, BackendOverride, NodeEntry, RootConfig, ToolEntry -from .overrides import apply_tool_overrides +from .models import NodeEntry, RootConfig, ToolEntry from .render import normalize_result -from .tree import EntryIndex, TreeError, build_tree, normalize_path +from .tree import EntryIndex, SourceRegistry, TreeError, build_tree, normalize_path logger = logging.getLogger(__name__) @@ -34,93 +32,80 @@ class Broker: def __init__(self, config: RootConfig, ignore_broken_tool: bool = False) -> None: self.config = config self.ignore_broken_tool = ignore_broken_tool - self.index: EntryIndex = build_tree(config.tree, set(config.backends)) + self.index: EntryIndex | None = None + self.sources: SourceRegistry | None = None self.backends: dict[str, MCPStdioBackend] = {} - self.tools_by_backend: dict[str, dict[str, dict[str, Any]]] = {} + self.raw_tools_by_backend: dict[str, list[dict[str, Any]]] = {} self.broken_backends: dict[str, str] = {} def startup(self) -> None: - for name, backend_cfg in self.config.backends.items(): + provisional_index, sources = build_tree(self.config, {}) + self.index = provisional_index + self.sources = sources + for key, source_cfg in sources.items(): try: - backend = self._start_backend(name, backend_cfg) - self.backends[name] = backend + backend = MCPStdioBackend(name=key, command=source_cfg.command) + backend.start() + self.backends[key] = backend + self.raw_tools_by_backend[key] = backend.list_tools(timeout=30) except Exception as exc: if not self.ignore_broken_tool: raise - self.broken_backends[name] = str(exc) - logger.warning("Skipping broken backend %s: %s", name, exc) - self._validate_tree_links() + self.broken_backends[key] = str(exc) + logger.warning("Skipping broken backend %s: %s", key, exc) + self.raw_tools_by_backend[key] = [] + self.index, self.sources = build_tree(self.config, self.raw_tools_by_backend) + if not self.ignore_broken_tool: + self._validate_all_sources_have_visible_tools() def shutdown(self) -> None: for backend in self.backends.values(): backend.close() self.backends.clear() + self.raw_tools_by_backend.clear() - def _start_backend(self, name: str, backend_cfg: BackendConfig) -> MCPStdioBackend: - backend = MCPStdioBackend(name=name, command=backend_cfg.command) - backend.start() - raw_tools = backend.list_tools(timeout=30) - filtered: dict[str, dict[str, Any]] = {} - for tool in raw_tools: - tool_name = tool.get("name") - if not tool_name or not is_allowed(tool_name, backend_cfg.tool_filter): - continue - override = backend_cfg.tool_overrides.get(tool_name) - filtered[tool_name] = apply_tool_overrides(tool, override) - self.tools_by_backend[name] = filtered - return backend - - def _validate_tree_links(self) -> None: - for path, entry in self.index.by_path.items(): - if not isinstance(entry, ToolEntry): - continue - if entry.backend_ref in self.broken_backends: - continue - tools = self.tools_by_backend.get(entry.backend_ref, {}) - if entry.tool_name not in tools: - raise BrokerError( - "missing_backend_tool", - f"Tree path {path} targets missing or filtered tool {entry.tool_name!r}", - ) + def _validate_all_sources_have_visible_tools(self) -> None: + assert self.index is not None + for entry in self.index.by_path.values(): + if isinstance(entry, NodeEntry) and entry.source is not None: + child_tools = [child for child in entry.children if isinstance(child, ToolEntry)] + if not child_tools: + raise BrokerError("empty_source", f"Node source exposes no tools: {entry.path}") def _resolve_entry(self, path: str) -> NodeEntry | ToolEntry: + if self.index is None: + raise BrokerError("internal_error", "Broker not started") try: entry = self.index.get(path) except TreeError as exc: raise BrokerError("unknown_path", str(exc)) from exc - if isinstance(entry, NodeEntry) or isinstance(entry, ToolEntry): + if isinstance(entry, (NodeEntry, ToolEntry)): return entry raise BrokerError("internal_error", f"Unsupported entry type for {path}") - def _resolve_tool(self, path: str) -> tuple[ToolEntry, dict[str, Any], BackendConfig, MCPStdioBackend]: + def _resolve_tool(self, path: str) -> tuple[ToolEntry, MCPStdioBackend]: entry = self._resolve_entry(path) if not isinstance(entry, ToolEntry): raise BrokerError("not_a_tool", f"Path is not a tool: {normalize_path(path)}") - if entry.backend_ref in self.broken_backends: + if entry.backend_key in self.broken_backends: raise BrokerError( "backend_unavailable", - f"Backend {entry.backend_ref!r} is unavailable: {self.broken_backends[entry.backend_ref]}", + f"Backend for {entry.path!r} is unavailable: {self.broken_backends[entry.backend_key]}", ) - tool = self.tools_by_backend.get(entry.backend_ref, {}).get(entry.tool_name) - backend_cfg = self.config.backends[entry.backend_ref] - backend = self.backends[entry.backend_ref] - if tool is None: - raise BrokerError("missing_backend_tool", f"Tool is not available: {entry.tool_name}") - return entry, tool, backend_cfg, backend + backend = self.backends.get(entry.backend_key) + if backend is None: + raise BrokerError("backend_unavailable", f"Backend is not available for path: {entry.path}") + return entry, backend def meta_tree(self, path: str) -> dict[str, Any]: entry = self._resolve_entry(path) if not isinstance(entry, NodeEntry): raise BrokerError("not_a_node", f"Path is not a node: {normalize_path(path)}") - children = [ - {"path": child.path, "type": child.type, "summary": child.summary} - for child in entry.children - ] return { "ok": True, "path": entry.path, "type": "node", - "children": children, + "children": [{"path": child.path, "type": child.type, "summary": child.summary} for child in entry.children], "usage_hint": "Use meta_desc on a leaf path before meta_call.", } @@ -136,53 +121,39 @@ class Broker: "usage_hint": "Use meta_tree to navigate child paths.", } if entry.children: - payload["children"] = [ - {"path": child.path, "type": child.type, "summary": child.summary} - for child in entry.children - ] + payload["children"] = [{"path": child.path, "type": child.type, "summary": child.summary} for child in entry.children] return payload - tool_entry, tool, _, _ = self._resolve_tool(path) - args_schema = tool.get("inputSchema") or {"type": "object", "properties": {}} - summary = tool.get("_broker_summary") or tool_entry.summary or tool.get("title") or tool_entry.tool_name - description = tool_entry.description or tool.get("description", "") + args_schema = entry.tool_meta.get("inputSchema") or {"type": "object", "properties": {}} payload = { "ok": True, - "path": tool_entry.path, + "path": entry.path, "type": "tool", - "summary": summary, - "description": description, + "summary": entry.summary, + "description": entry.description, "args_schema": args_schema, "usage_hint": "Call meta_call with this path and args matching args_schema.", } - example_args = tool.get("_broker_example_args") + example_args = entry.tool_meta.get("_broker_example_args") if example_args is not None: payload["example_args"] = example_args return payload def meta_call(self, path: str, args: dict[str, Any]) -> dict[str, Any]: - tool_entry, tool, _, backend = self._resolve_tool(path) - schema = tool.get("inputSchema") or {"type": "object", "properties": {}} + entry, backend = self._resolve_tool(path) + schema = entry.tool_meta.get("inputSchema") or {"type": "object", "properties": {}} try: Draft202012Validator(schema).validate(args) except ValidationError as exc: raise BrokerError( "invalid_arguments", exc.message, - { - "path": tool_entry.path, - "required": list(schema.get("required", [])), - "usage_hint": "Call meta_desc on the same path before retrying.", - }, + {"path": entry.path, "required": list(schema.get("required", [])), "usage_hint": "Call meta_desc on the same path before retrying."}, ) from exc - timeout = float(tool.get("_broker_timeout") or 30) - max_output_chars = tool.get("_broker_max_output_chars") + timeout = float(entry.tool_meta.get("_broker_timeout") or 30) + max_output_chars = entry.tool_meta.get("_broker_max_output_chars") try: - result = backend.call_tool(tool_entry.tool_name, args, timeout=timeout) + result = backend.call_tool(entry.tool_name, args, timeout=timeout) except MCPError as exc: - raise BrokerError("backend_call_failed", str(exc), {"path": tool_entry.path}) from exc - return { - "ok": True, - "path": tool_entry.path, - "result": normalize_result(result, max_output_chars=max_output_chars), - } + raise BrokerError("backend_call_failed", str(exc), {"path": entry.path}) from exc + return {"ok": True, "path": entry.path, "result": normalize_result(result, max_output_chars=max_output_chars)} diff --git a/pyMCPBroker/main.py b/pyMCPBroker/main.py index b4a1e1f..1b8dafa 100644 --- a/pyMCPBroker/main.py +++ b/pyMCPBroker/main.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse import json import logging -from pathlib import Path import uvicorn @@ -12,21 +11,18 @@ from .broker import Broker from .config import load_config - -def _parse_bind(value: str) -> tuple[str, int]: - if ":" not in value: - raise argparse.ArgumentTypeError("Bind address must be HOST:PORT") - host, port = value.rsplit(":", 1) - if not host: - raise argparse.ArgumentTypeError("Missing host") +def _parse_bind(bind: str) -> tuple[str, int]: + host, sep, port = bind.rpartition(":") + if not sep or not host or not port: + raise SystemExit(f"Invalid bind address: {bind!r}. Expected HOST:PORT") return host, int(port) - + def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(prog="pyMCPBroker") - parser.add_argument("bind") - parser.add_argument("config_path") - parser.add_argument("shared_secret", nargs="?") + parser = argparse.ArgumentParser(prog="python -m pyMCPBroker") + parser.add_argument("bind", help="Bind host and port, format HOST:PORT") + parser.add_argument("config_path", help="Path to config JSON") + parser.add_argument("shared_secret", nargs="?", default=None, help="Optional shared secret") parser.add_argument("--reload", action="store_true") parser.add_argument("--ignore-broken-tool", action="store_true") parser.add_argument("--log-level", default="info") @@ -42,7 +38,18 @@ def main(argv: list[str] | None = None) -> int: config = load_config(args.config_path) broker = Broker(config, ignore_broken_tool=args.ignore_broken_tool) if args.dump_tree: - print(json.dumps(config.tree, indent=2, ensure_ascii=False)) + broker.startup() + try: + assert broker.index is not None + print( + json.dumps( + broker.meta_tree("/"), + indent=2, + ensure_ascii=False, + ) + ) + finally: + broker.shutdown() return 0 app = create_app(broker, shared_secret=args.shared_secret) uvicorn.run(app, host=host, port=port, reload=args.reload, log_level=str(args.log_level).lower()) diff --git a/pyMCPBroker/models.py b/pyMCPBroker/models.py index f3ba92d..985b83b 100644 --- a/pyMCPBroker/models.py +++ b/pyMCPBroker/models.py @@ -3,7 +3,7 @@ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Literal -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator class MetaTreeRequest(BaseModel): @@ -19,7 +19,7 @@ class MetaCallRequest(BaseModel): args: dict[str, Any] = Field(default_factory=dict) -class BackendOverride(BaseModel): +class ToolOverride(BaseModel): summary: str | None = None description: str | None = None max_output_chars: int | None = None @@ -28,16 +28,37 @@ class BackendOverride(BaseModel): render_mode: str | None = None -class BackendConfig(BaseModel): +class SourceConfig(BaseModel): backend: Literal["stdio"] command: str tool_filter: list[str] = Field(default_factory=list) - tool_overrides: dict[str, BackendOverride] = Field(default_factory=dict) + tool_overrides: dict[str, ToolOverride] = Field(default_factory=dict) + path_aliases: dict[str, str] = Field(default_factory=dict) + + @model_validator(mode="after") + def validate_aliases(self) -> "SourceConfig": + for tool_name, alias in self.path_aliases.items(): + if not tool_name: + raise ValueError("path_aliases keys must not be empty") + if not alias or "/" in alias or alias in {".", ".."}: + raise ValueError(f"Invalid path alias for tool {tool_name!r}: {alias!r}") + return self + + +class NodeConfig(BaseModel): + path: str + type: Literal["node"] = "node" + summary: str = "" + description: str = "" + children: list["NodeConfig"] = Field(default_factory=list) + source: SourceConfig | None = None + + +NodeConfig.model_rebuild() class RootConfig(BaseModel): - backends: dict[str, BackendConfig] - tree: dict[str, Any] + tree: NodeConfig | list[NodeConfig] @dataclass(slots=True) @@ -51,9 +72,11 @@ class Entry: @dataclass(slots=True) class NodeEntry(Entry): children: list[Entry] = field(default_factory=list) + source: SourceConfig | None = None @dataclass(slots=True) class ToolEntry(Entry): - backend_ref: str = "" + backend_key: str = "" tool_name: str = "" + tool_meta: dict[str, Any] = field(default_factory=dict) diff --git a/pyMCPBroker/overrides.py b/pyMCPBroker/overrides.py index f68b9cb..657600b 100644 --- a/pyMCPBroker/overrides.py +++ b/pyMCPBroker/overrides.py @@ -2,23 +2,23 @@ from __future__ import annotations from typing import Any -from .models import BackendOverride +from .models import ToolOverride -def apply_tool_overrides(tool: dict[str, Any], override: BackendOverride | None) -> dict[str, Any]: +def apply_tool_overrides(tool: dict[str, Any], override: ToolOverride | None) -> dict[str, Any]: merged = dict(tool) - if not override: + if override is None: return merged if override.summary is not None: merged["_broker_summary"] = override.summary if override.description is not None: merged["description"] = override.description + if override.max_output_chars is not None: + merged["_broker_max_output_chars"] = int(override.max_output_chars) + if override.timeout is not None: + merged["_broker_timeout"] = float(override.timeout) if override.example_args is not None: merged["_broker_example_args"] = override.example_args - if override.max_output_chars is not None: - merged["_broker_max_output_chars"] = override.max_output_chars - if override.timeout is not None: - merged["_broker_timeout"] = override.timeout if override.render_mode is not None: merged["_broker_render_mode"] = override.render_mode return merged diff --git a/pyMCPBroker/tree.py b/pyMCPBroker/tree.py index 798250f..79bd355 100644 --- a/pyMCPBroker/tree.py +++ b/pyMCPBroker/tree.py @@ -1,8 +1,12 @@ from __future__ import annotations -from typing import Any +import json +import hashlib +from typing import Any, Iterable -from .models import Entry, NodeEntry, ToolEntry +from .filters import is_allowed +from .models import Entry, NodeConfig, NodeEntry, RootConfig, SourceConfig, ToolEntry +from .overrides import apply_tool_overrides class TreeError(ValueError): @@ -22,6 +26,24 @@ class EntryIndex: raise TreeError(f"Unknown path: {normalized}") from exc +class SourceRegistry: + def __init__(self) -> None: + self._by_key: dict[str, SourceConfig] = {} + + def key_for(self, source: SourceConfig) -> str: + payload = json.dumps( + {"backend": source.backend, "command": source.command}, + sort_keys=True, + separators=(",", ":"), + ) + key = hashlib.sha1(payload.encode("utf-8")).hexdigest()[:12] + self._by_key.setdefault(key, source) + return key + + def items(self) -> Iterable[tuple[str, SourceConfig]]: + return self._by_key.items() + + def normalize_path(path: str) -> str: if not path: raise TreeError("Path must not be empty") @@ -34,40 +56,81 @@ def normalize_path(path: str) -> str: return path -def build_tree(raw: dict[str, Any], known_backends: set[str]) -> EntryIndex: - by_path: dict[str, Entry] = {} +def join_path(parent: str, child_name: str) -> str: + if not child_name or "/" in child_name: + raise TreeError(f"Invalid child path segment: {child_name!r}") + return f"/{child_name}" if parent == "/" else f"{parent}/{child_name}" - def parse(node: dict[str, Any]) -> Entry: - entry_type = node.get("type") - path = normalize_path(node["path"]) - if path in by_path: - raise TreeError(f"Duplicate path: {path}") - summary = node.get("summary", "") - description = node.get("description", "") - if entry_type == "node": - entry = NodeEntry(path=path, type="node", summary=summary, description=description) - by_path[path] = entry - entry.children = [parse(child) for child in node.get("children", [])] - return entry - if entry_type == "tool": - backend_ref = node.get("backend_ref", "") - if backend_ref not in known_backends: - raise TreeError(f"Unknown backend_ref {backend_ref!r} for {path}") + +def build_tree(config: RootConfig, raw_tools_by_backend: dict[str, list[dict[str, Any]]]) -> tuple[EntryIndex, SourceRegistry]: + by_path: dict[str, Entry] = {} + sources = SourceRegistry() + + def add_entry(entry: Entry) -> None: + if entry.path in by_path: + raise TreeError(f"Duplicate path: {entry.path}") + by_path[entry.path] = entry + + if isinstance(config.tree, list): + root_cfg = NodeConfig(path="/", type="node", summary="Root", description="", children=config.tree) + else: + root_cfg = config.tree + if normalize_path(root_cfg.path) != "/": + raise TreeError("Explicit tree object must be a node at /") + + root = NodeEntry(path="/", type="node", summary=root_cfg.summary or "Root", description=root_cfg.description, source=root_cfg.source) + add_entry(root) + + def parse_node(node_cfg: NodeConfig) -> NodeEntry: + if node_cfg.type != "node": + raise TreeError(f"Invalid node type for {node_cfg.path}: {node_cfg.type!r}") + path = normalize_path(node_cfg.path) + node = NodeEntry( + path=path, + type="node", + summary=node_cfg.summary, + description=node_cfg.description, + source=node_cfg.source, + ) + add_entry(node) + for child_cfg in node_cfg.children: + child = parse_node(child_cfg) + node.children.append(child) + if node_cfg.source: + node.children.extend(_generate_source_children(node, node_cfg.source)) + return node + + def _generate_source_children(parent: NodeEntry, source: SourceConfig) -> list[ToolEntry]: + backend_key = sources.key_for(source) + raw_tools = raw_tools_by_backend.get(backend_key, []) + entries: list[ToolEntry] = [] + for tool in raw_tools: + tool_name = tool.get("name") + if not tool_name or not is_allowed(tool_name, source.tool_filter): + continue + alias = source.path_aliases.get(tool_name, tool_name) + child_path = join_path(parent.path, alias) + if child_path in by_path: + raise TreeError(f"Duplicate path: {child_path}") + tool_meta = apply_tool_overrides(tool, source.tool_overrides.get(tool_name)) + summary = tool_meta.get("_broker_summary") or tool_meta.get("title") or tool_name + description = tool_meta.get("description", "") entry = ToolEntry( - path=path, + path=child_path, type="tool", summary=summary, description=description, - backend_ref=backend_ref, - tool_name=node.get("tool_name", ""), + backend_key=backend_key, + tool_name=tool_name, + tool_meta=tool_meta, ) - if not entry.tool_name: - raise TreeError(f"Missing tool_name for {path}") - by_path[path] = entry - return entry - raise TreeError(f"Invalid entry type for {path}: {entry_type!r}") + add_entry(entry) + entries.append(entry) + return entries - root = parse(raw) - if not isinstance(root, NodeEntry) or root.path != "/": - raise TreeError("Tree root must be a node at /") - return EntryIndex(root=root, by_path=by_path) + for child_cfg in root_cfg.children: + root.children.append(parse_node(child_cfg)) + if root_cfg.source: + root.children.extend(_generate_source_children(root, root_cfg.source)) + + return EntryIndex(root=root, by_path=by_path), sources diff --git a/pyproject.toml b/pyproject.toml index 994dc2e..9280710 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "pyMCPBroker" -version = "0.1.0" -description = "Small FastAPI MCP broker exposing stable meta-tools over stdio MCP backends" +version = "0.2.1" +description = "Small FastAPI MCP broker exposing stable meta-tools over stdio MCP sources" readme = "README.md" requires-python = ">=3.10" dependencies = [ diff --git a/smoke_config.json b/smoke_config.json index 6eeeb34..32112fc 100644 --- a/smoke_config.json +++ b/smoke_config.json @@ -1 +1,9 @@ -{"backends": {"gitea": {"backend": "stdio", "command": "/opt/pyvenv/bin/python /mnt/data/pyMCPBroker_project/tests/fake_mcp_server.py", "tool_filter": ["get_*", "!delete_*"], "tool_overrides": {}}}, "tree": {"path": "/", "type": "node", "children": [{"path": "/repo", "type": "node", "children": [{"path": "/repo/read/get_file", "type": "tool", "backend_ref": "gitea", "tool_name": "get_file_contents"}]}]}} \ No newline at end of file +{ + "tree": [ + { + "path": "/repo", + "type": "node", + "summary": "Repository operations" + } + ] +} diff --git a/tests/test_broker_end_to_end.py b/tests/test_broker_end_to_end.py index 71bb856..977bd3d 100644 --- a/tests/test_broker_end_to_end.py +++ b/tests/test_broker_end_to_end.py @@ -12,57 +12,43 @@ from pyMCPBroker.broker import Broker from pyMCPBroker.config import load_config + def make_config(tmp_path: Path) -> Path: server = Path(__file__).with_name("fake_mcp_server.py") command = f"{shlex.quote(sys.executable)} {shlex.quote(str(server))}" config = { - "backends": { - "gitea": { - "backend": "stdio", - "command": command, - "tool_filter": ["get_*", "!delete_*"], - "tool_overrides": { - "get_file_contents": { - "summary": "Read one file from a repository", - "max_output_chars": 1200, - "example_args": { - "owner": "myorg", - "repo": "demo-repo", - "ref": "main", - "filePath": "README.md", + "tree": [ + { + "path": "/repo", + "type": "node", + "summary": "Repository operations", + "children": [ + { + "path": "/repo/read", + "type": "node", + "summary": "Read repository data", + "source": { + "backend": "stdio", + "command": command, + "tool_filter": ["get_*", "!delete_*"], + "path_aliases": {"get_file_contents": "get_file"}, + "tool_overrides": { + "get_file_contents": { + "summary": "Read one file from a repository", + "max_output_chars": 1200, + "example_args": { + "owner": "myorg", + "repo": "demo-repo", + "ref": "main", + "filePath": "README.md", + }, + } + }, }, } - }, + ], } - }, - "tree": { - "path": "/", - "type": "node", - "summary": "Root", - "children": [ - { - "path": "/repo", - "type": "node", - "summary": "Repository operations", - "children": [ - { - "path": "/repo/read", - "type": "node", - "summary": "Read repository data", - "children": [ - { - "path": "/repo/read/get_file", - "type": "tool", - "summary": "Read one file", - "backend_ref": "gitea", - "tool_name": "get_file_contents", - } - ], - } - ], - } - ], - }, + ] } path = tmp_path / "config.json" path.write_text(json.dumps(config), encoding="utf-8") @@ -79,6 +65,10 @@ def test_meta_end_to_end(tmp_path: Path) -> None: assert r.status_code == 200 assert r.json()["children"][0]["path"] == "/repo" + r = client.post("/meta_tree", json={"path": "/repo/read"}) + assert r.status_code == 200 + assert r.json()["children"][0]["path"] == "/repo/read/get_file" + r = client.post("/meta_desc", json={"path": "/repo/read/get_file"}) body = r.json() assert body["summary"] == "Read one file from a repository" @@ -130,3 +120,60 @@ def test_secret_auth(tmp_path: Path) -> None: with TestClient(app) as client: assert client.post("/meta_tree", json={"path": "/"}).status_code == 401 assert client.post("/meta_tree", json={"path": "/"}, headers={"Authorization": "Bearer sekret"}).status_code == 200 + + +def test_no_filter_exposes_all_tools(tmp_path: Path) -> None: + server = Path(__file__).with_name("fake_mcp_server.py") + command = f"{shlex.quote(sys.executable)} {shlex.quote(str(server))}" + path = tmp_path / "config.json" + path.write_text( + json.dumps( + { + "tree": [ + { + "path": "/repo", + "type": "node", + "source": {"backend": "stdio", "command": command}, + } + ] + } + ), + encoding="utf-8", + ) + cfg = load_config(path) + broker = Broker(cfg) + app = create_app(broker) + + with TestClient(app) as client: + r = client.post("/meta_tree", json={"path": "/repo"}) + body = r.json() + child_paths = {child["path"] for child in body["children"]} + assert "/repo/get_file_contents" in child_paths + assert "/repo/delete_file" in child_paths + + +def test_explicit_root_is_optional(tmp_path: Path) -> None: + path = tmp_path / "config.json" + path.write_text( + json.dumps( + { + "tree": { + "path": "/", + "type": "node", + "summary": "Configured root", + "children": [ + {"path": "/repo", "type": "node", "summary": "Repository operations"} + ], + } + } + ), + encoding="utf-8", + ) + cfg = load_config(path) + broker = Broker(cfg) + app = create_app(broker) + + with TestClient(app) as client: + r = client.post("/meta_tree", json={"path": "/"}) + assert r.status_code == 200 + assert r.json()["children"][0]["path"] == "/repo" diff --git a/tests/test_config.py b/tests/test_config.py index 67979a7..5e86d6c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,13 +11,64 @@ from pyMCPBroker.config import load_config def test_env_substitution(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("BROKER_CMD", "python fake.py") path = tmp_path / "config.json" - path.write_text(json.dumps({"backends": {"x": {"backend": "stdio", "command": "${BROKER_CMD}"}}, "tree": {"path": "/", "type": "node", "children": []}}), encoding="utf-8") + path.write_text( + json.dumps( + { + "tree": [ + { + "path": "/repo", + "type": "node", + "source": {"backend": "stdio", "command": "${BROKER_CMD}"}, + } + ] + } + ), + encoding="utf-8", + ) cfg = load_config(path) - assert cfg.backends["x"].command == "python fake.py" + assert cfg.tree[0].source is not None + assert cfg.tree[0].source.command == "python fake.py" def test_env_missing_raises(tmp_path: Path) -> None: path = tmp_path / "config.json" - path.write_text(json.dumps({"backends": {"x": {"backend": "stdio", "command": "${MISSING_VAR}"}}, "tree": {"path": "/", "type": "node", "children": []}}), encoding="utf-8") + path.write_text( + json.dumps( + { + "tree": [ + { + "path": "/repo", + "type": "node", + "source": {"backend": "stdio", "command": "${MISSING_VAR}"}, + } + ] + } + ), + encoding="utf-8", + ) with pytest.raises(ValueError): load_config(path) + + +def test_invalid_path_alias_raises(tmp_path: Path) -> None: + path = tmp_path / "config.json" + path.write_text( + json.dumps( + { + "tree": [ + { + "path": "/repo", + "type": "node", + "source": { + "backend": "stdio", + "command": "echo test", + "path_aliases": {"get_file_contents": "bad/name"}, + }, + } + ] + } + ), + encoding="utf-8", + ) + with pytest.raises(Exception): + load_config(path) diff --git a/tests/test_filters.py b/tests/test_filters.py index 7ac4bb3..4c1ebc8 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -8,3 +8,9 @@ def test_filter_allow_then_deny() -> None: assert not is_allowed("get_secret_token", patterns) assert not is_allowed("delete_file", patterns) assert filter_names(["get_file", "get_secret_token", "delete_file"], patterns) == ["get_file"] + + +def test_filter_deny_only_starts_from_all_allowed() -> None: + patterns = ["!delete_*"] + assert is_allowed("get_file", patterns) + assert not is_allowed("delete_file", patterns)