Add regex JSONPath support and gen_apidoc command

**Enhanced JSONPath Support**:
- Manual regex implementation for queries like `$.tools[?(@.name =~ /pattern/i)]`
- Supports case-insensitive searches with /i flag
- Works on both tool names and descriptions
- Fallback to standard JSONPath for non-regex queries

**gen_apidoc Command**:
- New `python setup.py gen_apidoc` command
- Generates comprehensive JSON documentation of MCP API
- Includes all servers, tools, schemas, and capabilities
- AI-optimized format with discovery patterns
- Runtime server status information
- Perfect for adding to project knowledge bases

**API Documentation Features**:
- Complete server inventory with tool counts
- Capability inference from tool names/descriptions
- Discovery pattern examples for common queries
- Sparse mode information for context optimization
- Tool grouping by server with metadata

This enables powerful regex-based tool discovery and provides
comprehensive API documentation for AI consumption.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude4Ξlope 2025-06-28 13:04:59 +02:00
parent c55ffdb59b
commit 1fee302b4d
2 changed files with 308 additions and 2 deletions

View File

@ -6,6 +6,7 @@ and supports sparse mode for context optimization.
""" """
import json import json
import re
from typing import Dict, Any, List, Optional, Union from typing import Dict, Any, List, Optional, Union
from jsonpath_ng import parse as jsonpath_parse from jsonpath_ng import parse as jsonpath_parse
from jsonpath_ng.exceptions import JsonPathParserError from jsonpath_ng.exceptions import JsonPathParserError
@ -56,6 +57,10 @@ class ToolRegistry:
$.tools[?(@.name=='Bash')] - Get Bash tool details $.tools[?(@.name=='Bash')] - Get Bash tool details
$.tools[*].inputSchema - Get all input schemas $.tools[*].inputSchema - Get all input schemas
""" """
# Check if this is a regex query and handle it specially
if "=~" in jsonpath:
return self._regex_search(jsonpath)
try: try:
expr = jsonpath_parse(jsonpath) expr = jsonpath_parse(jsonpath)
except (JsonPathParserError, Exception): except (JsonPathParserError, Exception):
@ -79,6 +84,89 @@ class ToolRegistry:
else: else:
return [match.value for match in matches] return [match.value for match in matches]
def _regex_search(self, jsonpath: str) -> Union[List[Any], Any, None]:
"""
Handle regex-based JSONPath queries manually.
Supports patterns like: $.tools[?(@.name =~ /pattern/flags)]
"""
# Parse basic regex patterns for tools
if "$.tools[?(@.name =~" in jsonpath:
# Extract regex pattern
match = re.search(r'/([^/]+)/([gi]*)', jsonpath)
if not match:
return None
pattern = match.group(1)
flags_str = match.group(2)
# Convert flags
flags = 0
if 'i' in flags_str:
flags |= re.IGNORECASE
if 'g' in flags_str:
pass # Global is default behavior in Python findall
try:
regex = re.compile(pattern, flags)
except re.error:
return None
# Search through tools
matches = []
for tool in self.raw_tool_list:
tool_name = tool.get("name", "")
if regex.search(tool_name):
matches.append(tool)
return matches if matches else None
elif "$.tools[?(@.description =~" in jsonpath:
# Extract regex pattern for descriptions
match = re.search(r'/([^/]+)/([gi]*)', jsonpath)
if not match:
return None
pattern = match.group(1)
flags_str = match.group(2)
flags = 0
if 'i' in flags_str:
flags |= re.IGNORECASE
try:
regex = re.compile(pattern, flags)
except re.error:
return None
# Search through tool descriptions
matches = []
for tool in self.raw_tool_list:
description = tool.get("description", "")
if regex.search(description):
matches.append(tool)
return matches if matches else None
# Fallback to basic JSONPath if regex pattern not recognized
try:
expr = jsonpath_parse(jsonpath.replace("=~", "==")) # Try basic equality
search_data = {
"tools": self.raw_tool_list,
"tool_names": self.get_all_tool_names(),
"metadata": self._metadata,
"servers": self._metadata.get("servers", {})
}
matches = expr.find(search_data)
if not matches:
return None
elif len(matches) == 1:
return matches[0].value
else:
return [match.value for match in matches]
except:
return None
def get_sparse_tools(self) -> List[Dict[str, Any]]: def get_sparse_tools(self) -> List[Dict[str, Any]]:
""" """
Get minimal tool list for sparse mode. Get minimal tool list for sparse mode.
@ -148,8 +236,126 @@ class ToolRegistry:
return sparse_tools return sparse_tools
def set_metadata(self, key: str, value: Any): def get_full_api_documentation(self) -> Dict[str, Any]:
"""Set metadata that can be discovered via JSONPath.""" """
Generate comprehensive API documentation for AI consumption.
Returns complete server and tool information in structured JSON format.
"""
servers = self._metadata.get("servers", {})
# Group tools by server
tools_by_server = {}
builtin_tools = []
for tool in self.raw_tool_list:
tool_name = tool.get("name", "")
# Check if it's a server-namespaced tool
if "::" in tool_name:
server_ns = tool_name.split("::")[0]
if server_ns not in tools_by_server:
tools_by_server[server_ns] = []
tools_by_server[server_ns].append(tool)
else:
# Check if tool belongs to a specific server based on metadata
found_server = None
for server_name, server_info in servers.items():
server_tools = server_info.get("tools", [])
if any(t.get("name") == tool_name for t in server_tools):
found_server = server_name
break
if found_server:
if found_server not in tools_by_server:
tools_by_server[found_server] = []
tools_by_server[found_server].append(tool)
else:
builtin_tools.append(tool)
# Build comprehensive documentation
api_doc = {
"mcp_browser_version": "0.2.0",
"total_servers": len(servers) + (1 if builtin_tools else 0),
"total_tools": len(self.raw_tool_list),
"generation_timestamp": None, # Will be set when called
"servers": {},
"builtin": {
"name": "builtin",
"description": "Built-in MCP Browser servers (screen, memory, patterns, onboarding)",
"status": "active",
"tools": builtin_tools,
"tool_count": len(builtin_tools),
"capabilities": ["screen_management", "memory_storage", "pattern_matching", "onboarding_management"]
},
"discovery_patterns": {
"all_tools": "$.tools[*]",
"all_tool_names": "$.tools[*].name",
"tools_by_server": "$.servers[*].tools[*]",
"tool_schemas": "$.tools[*].inputSchema",
"memory_tools": "$.tools[?(@.name =~ /memory|task|pattern|knowledge/i)]",
"screen_tools": "$.tools[?(@.name =~ /screen|session/i)]",
"find_tool_by_name": "$.tools[?(@.name=='TOOL_NAME')]",
"server_capabilities": "$.servers[*].capabilities"
},
"sparse_mode_info": {
"visible_tools": ["mcp_discover", "mcp_call", "onboarding"],
"hidden_tools": len(self.raw_tool_list),
"purpose": "Context optimization - full MCP API accessible via proxy tools"
}
}
# Add external servers
for server_name, server_info in servers.items():
server_tools = tools_by_server.get(server_name, [])
api_doc["servers"][server_name] = {
"name": server_name,
"description": server_info.get("description", ""),
"command": server_info.get("command", []),
"status": server_info.get("status", "unknown"),
"tools": server_tools,
"tool_count": len(server_tools),
"tool_names": [t.get("name", "") for t in server_tools],
"environment": server_info.get("env", {}),
"working_directory": server_info.get("cwd"),
"capabilities": self._extract_capabilities(server_tools)
}
return api_doc
def _extract_capabilities(self, tools: List[Dict[str, Any]]) -> List[str]:
"""Extract capabilities from tool list."""
capabilities = set()
for tool in tools:
name = tool.get("name", "").lower()
desc = tool.get("description", "").lower()
# Infer capabilities from tool names and descriptions
if any(keyword in name for keyword in ["read", "write", "file"]):
capabilities.add("file_operations")
if any(keyword in name for keyword in ["search", "query", "find"]):
capabilities.add("search_operations")
if any(keyword in name for keyword in ["web", "http", "url"]):
capabilities.add("web_operations")
if any(keyword in name for keyword in ["git", "repo", "commit"]):
capabilities.add("version_control")
if any(keyword in name for keyword in ["memory", "store", "save"]):
capabilities.add("data_storage")
if any(keyword in name for keyword in ["exec", "run", "command"]):
capabilities.add("command_execution")
if any(keyword in desc for keyword in ["browser", "scrape", "crawl"]):
capabilities.add("web_scraping")
return sorted(list(capabilities))
def set_metadata(self, metadata: Dict[str, Any]):
"""Set metadata about servers and configuration."""
self._metadata = metadata
def update_metadata(self, key: str, value: Any):
"""Set specific metadata that can be discovered via JSONPath."""
self._metadata[key] = value self._metadata[key] = value
def to_json(self) -> str: def to_json(self) -> str:

100
setup.py
View File

@ -158,6 +158,105 @@ class TestCommand(Command):
print("✅ All tests passed!") print("✅ All tests passed!")
class GenerateApiDocs(Command):
"""Generate comprehensive MCP API documentation for AI consumption."""
description = 'Generate JSON API documentation of all MCP servers and tools'
user_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
"""Generate comprehensive MCP API documentation."""
print("Generating MCP API Documentation...")
import asyncio
from datetime import datetime
async def generate_docs():
# Import here to avoid circular dependencies
from mcp_browser import MCPBrowser
from pathlib import Path
try:
# Initialize MCP Browser with built-in servers
browser = MCPBrowser(enable_builtin_servers=True)
# Wait for initialization and force tool discovery
await asyncio.sleep(3)
# Try to trigger tool discovery
if hasattr(browser, 'discover_tools'):
await browser.discover_tools()
elif hasattr(browser, 'multi_server') and browser.multi_server:
# Force refresh of tools from built-in servers
await browser.multi_server.refresh_tools()
# Get comprehensive API documentation
api_doc = browser.registry.get_full_api_documentation()
api_doc["generation_timestamp"] = datetime.now().isoformat()
# Add runtime server information
if hasattr(browser, 'multi_server') and browser.multi_server:
server_status = {}
for name, server in browser.multi_server.servers.items():
server_status[name] = {
"status": "active" if server.is_running() else "inactive",
"pid": getattr(server, 'process', {}).get('pid') if hasattr(server, 'process') else None
}
api_doc["runtime_status"] = server_status
# Write to file
output_file = Path("mcp_api_documentation.json")
with open(output_file, 'w') as f:
import json
json.dump(api_doc, f, indent=2)
print(f"✓ Generated comprehensive MCP API documentation")
print(f"✓ Output: {output_file.absolute()}")
print(f"✓ Total servers: {api_doc['total_servers']}")
print(f"✓ Total tools: {api_doc['total_tools']}")
# Also print summary for immediate use
print("\n" + "=" * 60)
print("QUICK REFERENCE:")
print("=" * 60)
builtin = api_doc.get("builtin", {})
print(f"Built-in tools ({builtin.get('tool_count', 0)}):")
for tool in builtin.get("tools", [])[:5]: # Show first 5
print(f" - {tool.get('name', 'Unknown')}")
if builtin.get('tool_count', 0) > 5:
print(f" ... and {builtin.get('tool_count', 0) - 5} more")
print(f"\nExternal servers ({len(api_doc.get('servers', {}))}):")
for server_name, server_info in api_doc.get("servers", {}).items():
print(f" - {server_name}: {server_info.get('tool_count', 0)} tools")
print(f"\nDiscovery patterns available in documentation:")
for pattern_name, pattern in api_doc.get("discovery_patterns", {}).items():
print(f" - {pattern_name}: {pattern}")
# Clean up if method exists
if hasattr(browser, 'cleanup'):
await browser.cleanup()
elif hasattr(browser, 'close'):
await browser.close()
elif hasattr(browser, 'shutdown'):
await browser.shutdown()
except Exception as e:
print(f"✗ Failed to generate API documentation: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
asyncio.run(generate_docs())
# Read long description # Read long description
with open("README.md", "r", encoding="utf-8") as fh: with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read() long_description = fh.read()
@ -213,6 +312,7 @@ setup(
cmdclass={ cmdclass={
'aidocs': GenerateAIDocs, 'aidocs': GenerateAIDocs,
'test': TestCommand, 'test': TestCommand,
'gen_apidoc': GenerateApiDocs,
}, },
license="GPL-3.0-or-later", license="GPL-3.0-or-later",
classifiers=[ classifiers=[