diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9b38853 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/README.md b/README.md index d511125..dae0c5b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,38 @@ # pyMCPBroker +Small FastAPI broker exposing three stable meta-tools over MCP backends: + +- `meta_tree` +- `meta_desc` +- `meta_call` + +## Scope + +Current implementation: + +- MCP `stdio` backend only +- persistent subprocess per backend +- `initialize` + `notifications/initialized` +- paginated `tools/list` +- `tools/call` +- allow/deny wildcard filters +- backend overrides +- broker-side JSON Schema validation +- compact structured error payloads +- result truncation with shape-preserving best effort +- optional shared secret (`Authorization: Bearer ` or `X-Api-Key`) + +## Run + +```bash +pip install . +python -m pyMCPBroker 0.0.0.0:8100 /config.json +``` + +Optional shared secret: + +```bash +python -m pyMCPBroker 0.0.0.0:8100 /config.json mysecret +``` + +Example config: `examples/config.example.json` diff --git a/examples/config.example.json b/examples/config.example.json new file mode 100644 index 0000000..112b164 --- /dev/null +++ b/examples/config.example.json @@ -0,0 +1,49 @@ +{ + "backends": { + "gitea": { + "backend": "stdio", + "command": "/opt/gitea-mcp/gitea-mcp --host ${GITEA_URL} --token ${GITEA_TOKEN}", + "tool_filter": ["get_*", "list_*", "search_*", "!delete_*", "!create_*"], + "tool_overrides": { + "get_file_contents": { + "summary": "Read one file from a repository", + "max_output_chars": 12000, + "example_args": { + "owner": "myorg", + "repo": "demo-repo", + "ref": "main", + "filePath": "README.md" + } + } + } + } + }, + "tree": { + "path": "/", + "type": "node", + "summary": "Root", + "children": [ + { + "path": "/repo", + "type": "node", + "summary": "Repository operations", + "children": [ + { + "path": "/repo/read", + "type": "node", + "summary": "Read repository data", + "children": [ + { + "path": "/repo/read/get_file", + "type": "tool", + "summary": "Read one file", + "backend_ref": "gitea", + "tool_name": "get_file_contents" + } + ] + } + ] + } + ] + } +} diff --git a/pyMCPBroker/__init__.py b/pyMCPBroker/__init__.py new file mode 100644 index 0000000..07c5de9 --- /dev/null +++ b/pyMCPBroker/__init__.py @@ -0,0 +1,2 @@ +__all__ = ["__version__"] +__version__ = "0.1.0" diff --git a/pyMCPBroker/__main__.py b/pyMCPBroker/__main__.py new file mode 100644 index 0000000..51d8b48 --- /dev/null +++ b/pyMCPBroker/__main__.py @@ -0,0 +1,4 @@ +from .main import main + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyMCPBroker/app.py b/pyMCPBroker/app.py new file mode 100644 index 0000000..ffe2b4b --- /dev/null +++ b/pyMCPBroker/app.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from contextlib import asynccontextmanager + +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse + +from .broker import Broker, BrokerError +from .models import MetaCallRequest, MetaDescRequest, MetaTreeRequest + + +def create_app(broker: Broker, shared_secret: str | None = None) -> FastAPI: + @asynccontextmanager + async def lifespan(_: FastAPI): + broker.startup() + try: + yield + finally: + broker.shutdown() + + app = FastAPI( + title="pyMCPBroker", + version="0.1.0", + description=( + "Expose three stable meta-tools over MCP backends. " + "Use meta_tree to navigate paths, meta_desc to inspect a path, and meta_call to execute a tool path." + ), + lifespan=lifespan, + ) + + @app.middleware("http") + async def auth_middleware(request: Request, call_next): # type: ignore[override] + if shared_secret: + auth = request.headers.get("authorization", "") + api_key = request.headers.get("x-api-key", "") + if auth != f"Bearer {shared_secret}" and api_key != shared_secret: + return JSONResponse({"ok": False, "error_code": "unauthorized", "message": "Missing or invalid secret"}, status_code=401) + return await call_next(request) + + @app.exception_handler(BrokerError) + async def broker_error_handler(_: Request, exc: BrokerError) -> JSONResponse: + status = 400 + if exc.error_code in {"backend_unavailable", "backend_call_failed", "missing_backend_tool"}: + status = 502 + return JSONResponse(exc.as_payload(), status_code=status) + + @app.get("/") + async def root() -> dict[str, object]: + return {"ok": True, "service": "pyMCPBroker", "meta_tools": ["meta_tree", "meta_desc", "meta_call"]} + + @app.post( + "/meta_tree", + summary="meta_tree", + description="Meta-tool. Use this first to discover available paths. Call it on a node path to list its direct children.", + ) + async def meta_tree(request: MetaTreeRequest) -> dict[str, object]: + return broker.meta_tree(request.path) + + @app.post( + "/meta_desc", + summary="meta_desc", + description="Meta-tool. Use this to inspect a path. For a tool path, it returns the exact argument schema to use before calling it.", + ) + async def meta_desc(request: MetaDescRequest) -> dict[str, object]: + return broker.meta_desc(request.path) + + @app.post( + "/meta_call", + summary="meta_call", + description="Meta-tool. Calls a tool path. The args object must match the schema previously returned by meta_desc.", + ) + async def meta_call(request: MetaCallRequest) -> dict[str, object]: + return broker.meta_call(request.path, request.args) + + return app diff --git a/pyMCPBroker/backend_stdio.py b/pyMCPBroker/backend_stdio.py new file mode 100644 index 0000000..055fa58 --- /dev/null +++ b/pyMCPBroker/backend_stdio.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +import json +import logging +import subprocess +import threading +import time +from dataclasses import dataclass, field +from queue import Queue +from typing import Any + +logger = logging.getLogger(__name__) + +MCP_PROTOCOL_VERSION = "2025-06-18" + + +class MCPError(RuntimeError): + pass + + +@dataclass(slots=True) +class _Pending: + queue: Queue[dict[str, Any]] = field(default_factory=Queue) + + +class MCPStdioBackend: + def __init__(self, name: str, command: str) -> None: + self.name = name + self.command = command + self.process: subprocess.Popen[str] | None = None + self._send_lock = threading.Lock() + self._state_lock = threading.Lock() + self._pending: dict[int, _Pending] = {} + self._next_id = 1 + self._reader: threading.Thread | None = None + self._stderr_reader: threading.Thread | None = None + self.server_info: dict[str, Any] = {} + self.capabilities: dict[str, Any] = {} + + def start(self) -> None: + if self.process is not None: + return + self.process = subprocess.Popen( + self.command, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + encoding="utf-8", + bufsize=1, + ) + if not self.process.stdin or not self.process.stdout or not self.process.stderr: + raise MCPError(f"Backend {self.name} failed to open stdio pipes") + self._reader = threading.Thread(target=self._read_stdout, name=f"{self.name}-stdout", daemon=True) + self._reader.start() + self._stderr_reader = threading.Thread(target=self._read_stderr, name=f"{self.name}-stderr", daemon=True) + self._stderr_reader.start() + init = self.request( + "initialize", + { + "protocolVersion": MCP_PROTOCOL_VERSION, + "capabilities": {}, + "clientInfo": {"name": "pyMCPBroker", "version": "0.1.0"}, + }, + timeout=10, + ) + self.capabilities = init.get("capabilities", {}) + self.server_info = init.get("serverInfo", {}) + self.notify("notifications/initialized", {}) + + def close(self) -> None: + process = self.process + self.process = None + if process is None: + return + try: + if process.stdin: + process.stdin.close() + except Exception: + pass + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=2) + except subprocess.TimeoutExpired: + process.kill() + + def _read_stderr(self) -> None: + assert self.process and self.process.stderr + for line in self.process.stderr: + text = line.rstrip("\n") + if text: + logger.debug("[%s stderr] %s", self.name, text) + + def _fail_pending(self, message: str) -> None: + with self._state_lock: + pending = list(self._pending.values()) + self._pending.clear() + for item in pending: + item.queue.put({"jsonrpc": "2.0", "error": {"code": -32000, "message": message}}) + + def _read_stdout(self) -> None: + assert self.process and self.process.stdout + try: + for line in self.process.stdout: + raw = line.strip() + if not raw: + continue + try: + message = json.loads(raw) + except json.JSONDecodeError as exc: + logger.warning("[%s] ignoring invalid JSON line: %s", self.name, exc) + continue + response_id = message.get("id") + if response_id is None: + logger.debug("[%s notify] %s", self.name, message) + continue + with self._state_lock: + pending = self._pending.get(int(response_id)) + if pending: + pending.queue.put(message) + finally: + self._fail_pending(f"Backend {self.name} closed stdout") + + def _next_request_id(self) -> int: + with self._state_lock: + request_id = self._next_id + self._next_id += 1 + self._pending[request_id] = _Pending() + return request_id + + def _pop_pending(self, request_id: int) -> _Pending | None: + with self._state_lock: + return self._pending.pop(request_id, None) + + def _send(self, payload: dict[str, Any]) -> None: + if self.process is None or self.process.stdin is None: + raise MCPError(f"Backend {self.name} is not running") + with self._send_lock: + if self.process.poll() is not None: + raise MCPError(f"Backend {self.name} exited with code {self.process.returncode}") + self.process.stdin.write(json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + "\n") + self.process.stdin.flush() + + def notify(self, method: str, params: dict[str, Any] | None = None) -> None: + payload: dict[str, Any] = {"jsonrpc": "2.0", "method": method} + if params: + payload["params"] = params + self._send(payload) + + def request(self, method: str, params: dict[str, Any] | None = None, timeout: float = 30) -> dict[str, Any]: + request_id = self._next_request_id() + try: + payload: dict[str, Any] = {"jsonrpc": "2.0", "id": request_id, "method": method} + if params is not None: + payload["params"] = params + self._send(payload) + pending = self._pending[request_id] + response = pending.queue.get(timeout=timeout) + except Exception: + self._pop_pending(request_id) + raise + self._pop_pending(request_id) + if "error" in response: + error = response["error"] + raise MCPError(f"{method} failed: {error.get('message', 'unknown error')}") + return response.get("result", {}) + + def list_tools(self, timeout: float = 30) -> list[dict[str, Any]]: + tools: list[dict[str, Any]] = [] + cursor: str | None = None + while True: + params: dict[str, Any] = {} + if cursor: + params["cursor"] = cursor + result = self.request("tools/list", params=params, timeout=timeout) + tools.extend(result.get("tools", [])) + cursor = result.get("nextCursor") + if not cursor: + return tools + + def call_tool(self, tool_name: str, arguments: dict[str, Any], timeout: float = 30) -> dict[str, Any]: + return self.request( + "tools/call", + params={"name": tool_name, "arguments": arguments}, + timeout=timeout, + ) diff --git a/pyMCPBroker/broker.py b/pyMCPBroker/broker.py new file mode 100644 index 0000000..baf6d8f --- /dev/null +++ b/pyMCPBroker/broker.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import Any + +from jsonschema import Draft202012Validator +from jsonschema.exceptions import ValidationError + +from .backend_stdio import MCPError, MCPStdioBackend +from .filters import is_allowed +from .models import BackendConfig, BackendOverride, NodeEntry, RootConfig, ToolEntry +from .overrides import apply_tool_overrides +from .render import normalize_result +from .tree import EntryIndex, TreeError, build_tree, normalize_path + +logger = logging.getLogger(__name__) + + +@dataclass(slots=True) +class BrokerError(Exception): + error_code: str + message: str + extra: dict[str, Any] | None = None + + def as_payload(self) -> dict[str, Any]: + payload = {"ok": False, "error_code": self.error_code, "message": self.message} + if self.extra: + payload.update(self.extra) + return payload + + +class Broker: + def __init__(self, config: RootConfig, ignore_broken_tool: bool = False) -> None: + self.config = config + self.ignore_broken_tool = ignore_broken_tool + self.index: EntryIndex = build_tree(config.tree, set(config.backends)) + self.backends: dict[str, MCPStdioBackend] = {} + self.tools_by_backend: dict[str, dict[str, dict[str, Any]]] = {} + self.broken_backends: dict[str, str] = {} + + def startup(self) -> None: + for name, backend_cfg in self.config.backends.items(): + try: + backend = self._start_backend(name, backend_cfg) + self.backends[name] = backend + except Exception as exc: + if not self.ignore_broken_tool: + raise + self.broken_backends[name] = str(exc) + logger.warning("Skipping broken backend %s: %s", name, exc) + self._validate_tree_links() + + def shutdown(self) -> None: + for backend in self.backends.values(): + backend.close() + self.backends.clear() + + def _start_backend(self, name: str, backend_cfg: BackendConfig) -> MCPStdioBackend: + backend = MCPStdioBackend(name=name, command=backend_cfg.command) + backend.start() + raw_tools = backend.list_tools(timeout=30) + filtered: dict[str, dict[str, Any]] = {} + for tool in raw_tools: + tool_name = tool.get("name") + if not tool_name or not is_allowed(tool_name, backend_cfg.tool_filter): + continue + override = backend_cfg.tool_overrides.get(tool_name) + filtered[tool_name] = apply_tool_overrides(tool, override) + self.tools_by_backend[name] = filtered + return backend + + def _validate_tree_links(self) -> None: + for path, entry in self.index.by_path.items(): + if not isinstance(entry, ToolEntry): + continue + if entry.backend_ref in self.broken_backends: + continue + tools = self.tools_by_backend.get(entry.backend_ref, {}) + if entry.tool_name not in tools: + raise BrokerError( + "missing_backend_tool", + f"Tree path {path} targets missing or filtered tool {entry.tool_name!r}", + ) + + def _resolve_entry(self, path: str) -> NodeEntry | ToolEntry: + try: + entry = self.index.get(path) + except TreeError as exc: + raise BrokerError("unknown_path", str(exc)) from exc + if isinstance(entry, NodeEntry) or isinstance(entry, ToolEntry): + return entry + raise BrokerError("internal_error", f"Unsupported entry type for {path}") + + def _resolve_tool(self, path: str) -> tuple[ToolEntry, dict[str, Any], BackendConfig, MCPStdioBackend]: + entry = self._resolve_entry(path) + if not isinstance(entry, ToolEntry): + raise BrokerError("not_a_tool", f"Path is not a tool: {normalize_path(path)}") + if entry.backend_ref in self.broken_backends: + raise BrokerError( + "backend_unavailable", + f"Backend {entry.backend_ref!r} is unavailable: {self.broken_backends[entry.backend_ref]}", + ) + tool = self.tools_by_backend.get(entry.backend_ref, {}).get(entry.tool_name) + backend_cfg = self.config.backends[entry.backend_ref] + backend = self.backends[entry.backend_ref] + if tool is None: + raise BrokerError("missing_backend_tool", f"Tool is not available: {entry.tool_name}") + return entry, tool, backend_cfg, backend + + def meta_tree(self, path: str) -> dict[str, Any]: + entry = self._resolve_entry(path) + if not isinstance(entry, NodeEntry): + raise BrokerError("not_a_node", f"Path is not a node: {normalize_path(path)}") + children = [ + {"path": child.path, "type": child.type, "summary": child.summary} + for child in entry.children + ] + return { + "ok": True, + "path": entry.path, + "type": "node", + "children": children, + "usage_hint": "Use meta_desc on a leaf path before meta_call.", + } + + def meta_desc(self, path: str) -> dict[str, Any]: + entry = self._resolve_entry(path) + if isinstance(entry, NodeEntry): + payload = { + "ok": True, + "path": entry.path, + "type": "node", + "summary": entry.summary, + "description": entry.description, + "usage_hint": "Use meta_tree to navigate child paths.", + } + if entry.children: + payload["children"] = [ + {"path": child.path, "type": child.type, "summary": child.summary} + for child in entry.children + ] + return payload + + tool_entry, tool, _, _ = self._resolve_tool(path) + args_schema = tool.get("inputSchema") or {"type": "object", "properties": {}} + summary = tool.get("_broker_summary") or tool_entry.summary or tool.get("title") or tool_entry.tool_name + description = tool_entry.description or tool.get("description", "") + payload = { + "ok": True, + "path": tool_entry.path, + "type": "tool", + "summary": summary, + "description": description, + "args_schema": args_schema, + "usage_hint": "Call meta_call with this path and args matching args_schema.", + } + example_args = tool.get("_broker_example_args") + if example_args is not None: + payload["example_args"] = example_args + return payload + + def meta_call(self, path: str, args: dict[str, Any]) -> dict[str, Any]: + tool_entry, tool, _, backend = self._resolve_tool(path) + schema = tool.get("inputSchema") or {"type": "object", "properties": {}} + try: + Draft202012Validator(schema).validate(args) + except ValidationError as exc: + raise BrokerError( + "invalid_arguments", + exc.message, + { + "path": tool_entry.path, + "required": list(schema.get("required", [])), + "usage_hint": "Call meta_desc on the same path before retrying.", + }, + ) from exc + timeout = float(tool.get("_broker_timeout") or 30) + max_output_chars = tool.get("_broker_max_output_chars") + try: + result = backend.call_tool(tool_entry.tool_name, args, timeout=timeout) + except MCPError as exc: + raise BrokerError("backend_call_failed", str(exc), {"path": tool_entry.path}) from exc + return { + "ok": True, + "path": tool_entry.path, + "result": normalize_result(result, max_output_chars=max_output_chars), + } diff --git a/pyMCPBroker/config.py b/pyMCPBroker/config.py new file mode 100644 index 0000000..56ac223 --- /dev/null +++ b/pyMCPBroker/config.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import json +import os +import re +from pathlib import Path +from typing import Any + +from .models import RootConfig + +_ENV_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}") + + +def _expand_env(value: str) -> str: + def repl(match: re.Match[str]) -> str: + name = match.group(1) + if name not in os.environ: + raise ValueError(f"Missing environment variable: {name}") + return os.environ[name] + + return _ENV_RE.sub(repl, value) + + +def _expand(obj: Any) -> Any: + if isinstance(obj, str): + return _expand_env(obj) + if isinstance(obj, list): + return [_expand(item) for item in obj] + if isinstance(obj, dict): + return {key: _expand(value) for key, value in obj.items()} + return obj + + +def load_config(path: str | Path) -> RootConfig: + raw = json.loads(Path(path).read_text(encoding="utf-8")) + return RootConfig.model_validate(_expand(raw)) diff --git a/pyMCPBroker/filters.py b/pyMCPBroker/filters.py new file mode 100644 index 0000000..193139e --- /dev/null +++ b/pyMCPBroker/filters.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from fnmatch import fnmatchcase +from typing import Iterable + + +def _split_patterns(patterns: list[str]) -> tuple[list[str], list[str]]: + allow: list[str] = [] + deny: list[str] = [] + for pattern in patterns: + if pattern.startswith("!"): + deny.append(pattern[1:]) + else: + allow.append(pattern) + return allow, deny + + +def is_allowed(name: str, patterns: list[str]) -> bool: + allow, deny = _split_patterns(patterns) + if allow and not any(fnmatchcase(name, pattern) for pattern in allow): + return False + if any(fnmatchcase(name, pattern) for pattern in deny): + return False + return True + + +def filter_names(names: Iterable[str], patterns: list[str]) -> list[str]: + return [name for name in names if is_allowed(name, patterns)] diff --git a/pyMCPBroker/main.py b/pyMCPBroker/main.py new file mode 100644 index 0000000..b4a1e1f --- /dev/null +++ b/pyMCPBroker/main.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import argparse +import json +import logging +from pathlib import Path + +import uvicorn + +from .app import create_app +from .broker import Broker +from .config import load_config + + + +def _parse_bind(value: str) -> tuple[str, int]: + if ":" not in value: + raise argparse.ArgumentTypeError("Bind address must be HOST:PORT") + host, port = value.rsplit(":", 1) + if not host: + raise argparse.ArgumentTypeError("Missing host") + return host, int(port) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="pyMCPBroker") + parser.add_argument("bind") + parser.add_argument("config_path") + parser.add_argument("shared_secret", nargs="?") + parser.add_argument("--reload", action="store_true") + parser.add_argument("--ignore-broken-tool", action="store_true") + parser.add_argument("--log-level", default="info") + parser.add_argument("--dump-tree", action="store_true") + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + logging.basicConfig(level=getattr(logging, str(args.log_level).upper(), logging.INFO)) + host, port = _parse_bind(args.bind) + config = load_config(args.config_path) + broker = Broker(config, ignore_broken_tool=args.ignore_broken_tool) + if args.dump_tree: + print(json.dumps(config.tree, indent=2, ensure_ascii=False)) + return 0 + app = create_app(broker, shared_secret=args.shared_secret) + uvicorn.run(app, host=host, port=port, reload=args.reload, log_level=str(args.log_level).lower()) + return 0 diff --git a/pyMCPBroker/models.py b/pyMCPBroker/models.py new file mode 100644 index 0000000..f3ba92d --- /dev/null +++ b/pyMCPBroker/models.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal + +from pydantic import BaseModel, Field + + +class MetaTreeRequest(BaseModel): + path: str = "/" + + +class MetaDescRequest(BaseModel): + path: str + + +class MetaCallRequest(BaseModel): + path: str + args: dict[str, Any] = Field(default_factory=dict) + + +class BackendOverride(BaseModel): + summary: str | None = None + description: str | None = None + max_output_chars: int | None = None + timeout: float | None = None + example_args: dict[str, Any] | None = None + render_mode: str | None = None + + +class BackendConfig(BaseModel): + backend: Literal["stdio"] + command: str + tool_filter: list[str] = Field(default_factory=list) + tool_overrides: dict[str, BackendOverride] = Field(default_factory=dict) + + +class RootConfig(BaseModel): + backends: dict[str, BackendConfig] + tree: dict[str, Any] + + +@dataclass(slots=True) +class Entry: + path: str + type: Literal["node", "tool"] + summary: str = "" + description: str = "" + + +@dataclass(slots=True) +class NodeEntry(Entry): + children: list[Entry] = field(default_factory=list) + + +@dataclass(slots=True) +class ToolEntry(Entry): + backend_ref: str = "" + tool_name: str = "" diff --git a/pyMCPBroker/overrides.py b/pyMCPBroker/overrides.py new file mode 100644 index 0000000..f68b9cb --- /dev/null +++ b/pyMCPBroker/overrides.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Any + +from .models import BackendOverride + + +def apply_tool_overrides(tool: dict[str, Any], override: BackendOverride | None) -> dict[str, Any]: + merged = dict(tool) + if not override: + return merged + if override.summary is not None: + merged["_broker_summary"] = override.summary + if override.description is not None: + merged["description"] = override.description + if override.example_args is not None: + merged["_broker_example_args"] = override.example_args + if override.max_output_chars is not None: + merged["_broker_max_output_chars"] = override.max_output_chars + if override.timeout is not None: + merged["_broker_timeout"] = override.timeout + if override.render_mode is not None: + merged["_broker_render_mode"] = override.render_mode + return merged diff --git a/pyMCPBroker/render.py b/pyMCPBroker/render.py new file mode 100644 index 0000000..d6ce95b --- /dev/null +++ b/pyMCPBroker/render.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import json +from copy import deepcopy +from typing import Any + + +def _json_len(value: Any) -> int: + return len(json.dumps(value, ensure_ascii=False, separators=(",", ":"))) + + +def _truncate_strings(value: Any, cap: int) -> Any: + if isinstance(value, str): + if len(value) <= cap: + return value + if cap <= 3: + return value[:cap] + return value[: cap - 3] + "..." + if isinstance(value, list): + return [_truncate_strings(item, cap) for item in value] + if isinstance(value, dict): + return {key: _truncate_strings(item, cap) for key, item in value.items()} + return value + + +def _compact_lists(value: Any, max_items: int) -> Any: + if isinstance(value, list): + items = [_compact_lists(item, max_items) for item in value[:max_items]] + if len(value) > max_items: + items.append({"_truncated_items": len(value) - max_items}) + return items + if isinstance(value, dict): + return {key: _compact_lists(item, max_items) for key, item in value.items()} + return value + + +def normalize_result(result: Any, max_output_chars: int | None) -> Any: + if not max_output_chars or _json_len(result) <= max_output_chars: + return result + + current = deepcopy(result) + for cap in (4096, 2048, 1024, 512, 256, 128, 64): + current = _truncate_strings(current, cap) + if _json_len(current) <= max_output_chars: + return {"truncated": True, "max_output_chars": max_output_chars, "result": current} + + current = _compact_lists(current, 3) + for cap in (64, 32, 16): + current = _truncate_strings(current, cap) + if _json_len(current) <= max_output_chars: + return {"truncated": True, "max_output_chars": max_output_chars, "result": current} + + preview = json.dumps(result, ensure_ascii=False)[: max(0, max_output_chars - 32)] + return { + "truncated": True, + "max_output_chars": max_output_chars, + "preview": preview + ("..." if len(preview) < _json_len(result) else ""), + } diff --git a/pyMCPBroker/tree.py b/pyMCPBroker/tree.py new file mode 100644 index 0000000..798250f --- /dev/null +++ b/pyMCPBroker/tree.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from typing import Any + +from .models import Entry, NodeEntry, ToolEntry + + +class TreeError(ValueError): + pass + + +class EntryIndex: + def __init__(self, root: NodeEntry, by_path: dict[str, Entry]) -> None: + self.root = root + self.by_path = by_path + + def get(self, path: str) -> Entry: + normalized = normalize_path(path) + try: + return self.by_path[normalized] + except KeyError as exc: + raise TreeError(f"Unknown path: {normalized}") from exc + + +def normalize_path(path: str) -> str: + if not path: + raise TreeError("Path must not be empty") + if not path.startswith("/"): + raise TreeError(f"Path must be absolute: {path}") + if path != "/" and path.endswith("/"): + path = path[:-1] + if "//" in path: + raise TreeError(f"Invalid path: {path}") + return path + + +def build_tree(raw: dict[str, Any], known_backends: set[str]) -> EntryIndex: + by_path: dict[str, Entry] = {} + + def parse(node: dict[str, Any]) -> Entry: + entry_type = node.get("type") + path = normalize_path(node["path"]) + if path in by_path: + raise TreeError(f"Duplicate path: {path}") + summary = node.get("summary", "") + description = node.get("description", "") + if entry_type == "node": + entry = NodeEntry(path=path, type="node", summary=summary, description=description) + by_path[path] = entry + entry.children = [parse(child) for child in node.get("children", [])] + return entry + if entry_type == "tool": + backend_ref = node.get("backend_ref", "") + if backend_ref not in known_backends: + raise TreeError(f"Unknown backend_ref {backend_ref!r} for {path}") + entry = ToolEntry( + path=path, + type="tool", + summary=summary, + description=description, + backend_ref=backend_ref, + tool_name=node.get("tool_name", ""), + ) + if not entry.tool_name: + raise TreeError(f"Missing tool_name for {path}") + by_path[path] = entry + return entry + raise TreeError(f"Invalid entry type for {path}: {entry_type!r}") + + root = parse(raw) + if not isinstance(root, NodeEntry) or root.path != "/": + raise TreeError("Tree root must be a node at /") + return EntryIndex(root=root, by_path=by_path) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..994dc2e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,32 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyMCPBroker" +version = "0.1.0" +description = "Small FastAPI MCP broker exposing stable meta-tools over stdio MCP backends" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "fastapi>=0.111,<1", + "uvicorn>=0.30,<1", + "pydantic>=2.7,<3", + "jsonschema>=4.21,<5", +] + +[project.optional-dependencies] +test = [ + "pytest>=8,<10", + "httpx>=0.27,<1", +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +include = ["pyMCPBroker*"] + +[tool.pytest.ini_options] +addopts = "-q" +testpaths = ["tests"] diff --git a/smoke_config.json b/smoke_config.json new file mode 100644 index 0000000..6eeeb34 --- /dev/null +++ b/smoke_config.json @@ -0,0 +1 @@ +{"backends": {"gitea": {"backend": "stdio", "command": "/opt/pyvenv/bin/python /mnt/data/pyMCPBroker_project/tests/fake_mcp_server.py", "tool_filter": ["get_*", "!delete_*"], "tool_overrides": {}}}, "tree": {"path": "/", "type": "node", "children": [{"path": "/repo", "type": "node", "children": [{"path": "/repo/read/get_file", "type": "tool", "backend_ref": "gitea", "tool_name": "get_file_contents"}]}]}} \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f96b4d8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) diff --git a/tests/fake_mcp_server.py b/tests/fake_mcp_server.py new file mode 100644 index 0000000..22eaa62 --- /dev/null +++ b/tests/fake_mcp_server.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import json +import sys + +TOOLS = [ + { + "name": "get_file_contents", + "title": "Get file contents", + "description": "Read file contents at a given ref.", + "inputSchema": { + "type": "object", + "properties": { + "owner": {"type": "string"}, + "repo": {"type": "string"}, + "ref": {"type": "string"}, + "filePath": {"type": "string"}, + "withLines": {"type": "boolean"}, + }, + "required": ["owner", "repo", "ref", "filePath"], + "additionalProperties": False, + }, + }, + { + "name": "delete_file", + "title": "Delete file", + "description": "Dangerous write tool.", + "inputSchema": { + "type": "object", + "properties": {"filePath": {"type": "string"}}, + "required": ["filePath"], + }, + }, +] + + +def send(message: dict) -> None: + sys.stdout.write(json.dumps(message, separators=(",", ":")) + "\n") + sys.stdout.flush() + + +for line in sys.stdin: + raw = line.strip() + if not raw: + continue + msg = json.loads(raw) + method = msg.get("method") + msg_id = msg.get("id") + + if method == "initialize": + send( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": { + "protocolVersion": msg["params"]["protocolVersion"], + "capabilities": {"tools": {"listChanged": False}}, + "serverInfo": {"name": "fake-mcp", "version": "1.0"}, + }, + } + ) + continue + + if method == "notifications/initialized": + continue + + if method == "tools/list": + cursor = (msg.get("params") or {}).get("cursor") + if not cursor: + send( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": {"tools": [TOOLS[0]], "nextCursor": "page-2"}, + } + ) + else: + send( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": {"tools": [TOOLS[1]]}, + } + ) + continue + + if method == "tools/call": + params = msg.get("params") or {} + name = params["name"] + arguments = params.get("arguments") or {} + if name == "get_file_contents": + payload = { + "content": [ + { + "type": "text", + "text": "README line\n" * 3000, + } + ], + "structuredContent": { + "owner": arguments.get("owner"), + "repo": arguments.get("repo"), + "ref": arguments.get("ref"), + "filePath": arguments.get("filePath"), + }, + "isError": False, + } + send({"jsonrpc": "2.0", "id": msg_id, "result": payload}) + continue + send({"jsonrpc": "2.0", "id": msg_id, "error": {"code": -32601, "message": f"Unknown tool: {name}"}}) + continue + + send({"jsonrpc": "2.0", "id": msg_id, "error": {"code": -32601, "message": f"Unknown method: {method}"}}) diff --git a/tests/test_broker_end_to_end.py b/tests/test_broker_end_to_end.py new file mode 100644 index 0000000..71bb856 --- /dev/null +++ b/tests/test_broker_end_to_end.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import json +import shlex +import sys +from pathlib import Path + +from fastapi.testclient import TestClient + +from pyMCPBroker.app import create_app +from pyMCPBroker.broker import Broker +from pyMCPBroker.config import load_config + + +def make_config(tmp_path: Path) -> Path: + server = Path(__file__).with_name("fake_mcp_server.py") + command = f"{shlex.quote(sys.executable)} {shlex.quote(str(server))}" + config = { + "backends": { + "gitea": { + "backend": "stdio", + "command": command, + "tool_filter": ["get_*", "!delete_*"], + "tool_overrides": { + "get_file_contents": { + "summary": "Read one file from a repository", + "max_output_chars": 1200, + "example_args": { + "owner": "myorg", + "repo": "demo-repo", + "ref": "main", + "filePath": "README.md", + }, + } + }, + } + }, + "tree": { + "path": "/", + "type": "node", + "summary": "Root", + "children": [ + { + "path": "/repo", + "type": "node", + "summary": "Repository operations", + "children": [ + { + "path": "/repo/read", + "type": "node", + "summary": "Read repository data", + "children": [ + { + "path": "/repo/read/get_file", + "type": "tool", + "summary": "Read one file", + "backend_ref": "gitea", + "tool_name": "get_file_contents", + } + ], + } + ], + } + ], + }, + } + path = tmp_path / "config.json" + path.write_text(json.dumps(config), encoding="utf-8") + return path + + +def test_meta_end_to_end(tmp_path: Path) -> None: + cfg = load_config(make_config(tmp_path)) + broker = Broker(cfg) + app = create_app(broker) + + with TestClient(app) as client: + r = client.post("/meta_tree", json={"path": "/"}) + assert r.status_code == 200 + assert r.json()["children"][0]["path"] == "/repo" + + r = client.post("/meta_desc", json={"path": "/repo/read/get_file"}) + body = r.json() + assert body["summary"] == "Read one file from a repository" + assert body["args_schema"]["required"] == ["owner", "repo", "ref", "filePath"] + assert body["example_args"]["repo"] == "demo-repo" + + r = client.post( + "/meta_call", + json={ + "path": "/repo/read/get_file", + "args": {"owner": "acme", "repo": "demo", "ref": "main", "filePath": "README.md"}, + }, + ) + body = r.json() + assert body["ok"] is True + assert body["result"]["truncated"] is True + assert body["result"]["result"]["structuredContent"]["repo"] == "demo" + + +def test_meta_call_invalid_args(tmp_path: Path) -> None: + cfg = load_config(make_config(tmp_path)) + broker = Broker(cfg) + app = create_app(broker) + + with TestClient(app) as client: + r = client.post("/meta_call", json={"path": "/repo/read/get_file", "args": {"owner": "acme"}}) + body = r.json() + assert r.status_code == 400 + assert body["error_code"] == "invalid_arguments" + assert "required property" in body["message"] + + +def test_meta_tree_rejects_tool_path(tmp_path: Path) -> None: + cfg = load_config(make_config(tmp_path)) + broker = Broker(cfg) + app = create_app(broker) + + with TestClient(app) as client: + r = client.post("/meta_tree", json={"path": "/repo/read/get_file"}) + assert r.status_code == 400 + assert r.json()["error_code"] == "not_a_node" + + +def test_secret_auth(tmp_path: Path) -> None: + cfg = load_config(make_config(tmp_path)) + broker = Broker(cfg) + app = create_app(broker, shared_secret="sekret") + + with TestClient(app) as client: + assert client.post("/meta_tree", json={"path": "/"}).status_code == 401 + assert client.post("/meta_tree", json={"path": "/"}, headers={"Authorization": "Bearer sekret"}).status_code == 200 diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..67979a7 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from pyMCPBroker.config import load_config + + +def test_env_substitution(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("BROKER_CMD", "python fake.py") + path = tmp_path / "config.json" + path.write_text(json.dumps({"backends": {"x": {"backend": "stdio", "command": "${BROKER_CMD}"}}, "tree": {"path": "/", "type": "node", "children": []}}), encoding="utf-8") + cfg = load_config(path) + assert cfg.backends["x"].command == "python fake.py" + + +def test_env_missing_raises(tmp_path: Path) -> None: + path = tmp_path / "config.json" + path.write_text(json.dumps({"backends": {"x": {"backend": "stdio", "command": "${MISSING_VAR}"}}, "tree": {"path": "/", "type": "node", "children": []}}), encoding="utf-8") + with pytest.raises(ValueError): + load_config(path) diff --git a/tests/test_filters.py b/tests/test_filters.py new file mode 100644 index 0000000..7ac4bb3 --- /dev/null +++ b/tests/test_filters.py @@ -0,0 +1,10 @@ +from pyMCPBroker.filters import filter_names, is_allowed + + +def test_filter_allow_then_deny() -> None: + patterns = ["get_*", "list_*", "!get_secret*"] + assert is_allowed("get_file", patterns) + assert is_allowed("list_repos", patterns) + assert not is_allowed("get_secret_token", patterns) + assert not is_allowed("delete_file", patterns) + assert filter_names(["get_file", "get_secret_token", "delete_file"], patterns) == ["get_file"]