Add regex JSONPath support and gen_apidoc command

**Enhanced JSONPath Support**: - Manual regex implementation for queries like `$.tools[?(@.name =~ /pattern/i)]` - Supports case-insensitive searches with /i flag - Works on both tool names and descriptions - Fallback to standard JSONPath for non-regex queries **gen_apidoc Command**: - New `python setup.py gen_apidoc` command - Generates comprehensive JSON documentation of MCP API - Includes all servers, tools, schemas, and capabilities - AI-optimized format with discovery patterns - Runtime server status information - Perfect for adding to project knowledge bases **API Documentation Features**: - Complete server inventory with tool counts - Capability inference from tool names/descriptions - Discovery pattern examples for common queries - Sparse mode information for context optimization - Tool grouping by server with metadata This enables powerful regex-based tool discovery and provides comprehensive API documentation for AI consumption. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-28 13:04:59 +02:00 · 2025-06-28 13:04:59 +02:00 · 1fee302b4d
parent c55ffdb59b
commit 1fee302b4d
2 changed files with 308 additions and 2 deletions
--- a/mcp_browser/registry.py
+++ b/mcp_browser/registry.py
@ -6,6 +6,7 @@ and supports sparse mode for context optimization.
 """

 import json
+import re
 from typing import Dict, Any, List, Optional, Union
 from jsonpath_ng import parse as jsonpath_parse
 from jsonpath_ng.exceptions import JsonPathParserError
@ -56,6 +57,10 @@ class ToolRegistry:
            $.tools[?(@.name=='Bash')] - Get Bash tool details
            $.tools[*].inputSchema - Get all input schemas
        """
+        # Check if this is a regex query and handle it specially
+        if "=~" in jsonpath:
+            return self._regex_search(jsonpath)
+        
        try:
            expr = jsonpath_parse(jsonpath)
        except (JsonPathParserError, Exception):
@ -79,6 +84,89 @@ class ToolRegistry:
        else:
            return [match.value for match in matches]
    
+    def _regex_search(self, jsonpath: str) -> Union[List[Any], Any, None]:
+        """
+        Handle regex-based JSONPath queries manually.
+        
+        Supports patterns like: $.tools[?(@.name =~ /pattern/flags)]
+        """
+        # Parse basic regex patterns for tools
+        if "$.tools[?(@.name =~" in jsonpath:
+            # Extract regex pattern
+            match = re.search(r'/([^/]+)/([gi]*)', jsonpath)
+            if not match:
+                return None
+            
+            pattern = match.group(1)
+            flags_str = match.group(2)
+            
+            # Convert flags
+            flags = 0
+            if 'i' in flags_str:
+                flags |= re.IGNORECASE
+            if 'g' in flags_str:
+                pass  # Global is default behavior in Python findall
+            
+            try:
+                regex = re.compile(pattern, flags)
+            except re.error:
+                return None
+            
+            # Search through tools
+            matches = []
+            for tool in self.raw_tool_list:
+                tool_name = tool.get("name", "")
+                if regex.search(tool_name):
+                    matches.append(tool)
+            
+            return matches if matches else None
+        
+        elif "$.tools[?(@.description =~" in jsonpath:
+            # Extract regex pattern for descriptions
+            match = re.search(r'/([^/]+)/([gi]*)', jsonpath)
+            if not match:
+                return None
+            
+            pattern = match.group(1)
+            flags_str = match.group(2)
+            
+            flags = 0
+            if 'i' in flags_str:
+                flags |= re.IGNORECASE
+            
+            try:
+                regex = re.compile(pattern, flags)
+            except re.error:
+                return None
+            
+            # Search through tool descriptions
+            matches = []
+            for tool in self.raw_tool_list:
+                description = tool.get("description", "")
+                if regex.search(description):
+                    matches.append(tool)
+            
+            return matches if matches else None
+        
+        # Fallback to basic JSONPath if regex pattern not recognized
+        try:
+            expr = jsonpath_parse(jsonpath.replace("=~", "=="))  # Try basic equality
+            search_data = {
+                "tools": self.raw_tool_list,
+                "tool_names": self.get_all_tool_names(),
+                "metadata": self._metadata,
+                "servers": self._metadata.get("servers", {})
+            }
+            matches = expr.find(search_data)
+            if not matches:
+                return None
+            elif len(matches) == 1:
+                return matches[0].value
+            else:
+                return [match.value for match in matches]
+        except:
+            return None
+    
    def get_sparse_tools(self) -> List[Dict[str, Any]]:
        """
        Get minimal tool list for sparse mode.
@ -148,8 +236,126 @@ class ToolRegistry:
        
        return sparse_tools
    
-    def set_metadata(self, key: str, value: Any):
-        """Set metadata that can be discovered via JSONPath."""
+    def get_full_api_documentation(self) -> Dict[str, Any]:
+        """
+        Generate comprehensive API documentation for AI consumption.
+        
+        Returns complete server and tool information in structured JSON format.
+        """
+        servers = self._metadata.get("servers", {})
+        
+        # Group tools by server
+        tools_by_server = {}
+        builtin_tools = []
+        
+        for tool in self.raw_tool_list:
+            tool_name = tool.get("name", "")
+            
+            # Check if it's a server-namespaced tool
+            if "::" in tool_name:
+                server_ns = tool_name.split("::")[0]
+                if server_ns not in tools_by_server:
+                    tools_by_server[server_ns] = []
+                tools_by_server[server_ns].append(tool)
+            else:
+                # Check if tool belongs to a specific server based on metadata
+                found_server = None
+                for server_name, server_info in servers.items():
+                    server_tools = server_info.get("tools", [])
+                    if any(t.get("name") == tool_name for t in server_tools):
+                        found_server = server_name
+                        break
+                
+                if found_server:
+                    if found_server not in tools_by_server:
+                        tools_by_server[found_server] = []
+                    tools_by_server[found_server].append(tool)
+                else:
+                    builtin_tools.append(tool)
+        
+        # Build comprehensive documentation
+        api_doc = {
+            "mcp_browser_version": "0.2.0",
+            "total_servers": len(servers) + (1 if builtin_tools else 0),
+            "total_tools": len(self.raw_tool_list),
+            "generation_timestamp": None,  # Will be set when called
+            "servers": {},
+            "builtin": {
+                "name": "builtin",
+                "description": "Built-in MCP Browser servers (screen, memory, patterns, onboarding)",
+                "status": "active",
+                "tools": builtin_tools,
+                "tool_count": len(builtin_tools),
+                "capabilities": ["screen_management", "memory_storage", "pattern_matching", "onboarding_management"]
+            },
+            "discovery_patterns": {
+                "all_tools": "$.tools[*]",
+                "all_tool_names": "$.tools[*].name", 
+                "tools_by_server": "$.servers[*].tools[*]",
+                "tool_schemas": "$.tools[*].inputSchema",
+                "memory_tools": "$.tools[?(@.name =~ /memory|task|pattern|knowledge/i)]",
+                "screen_tools": "$.tools[?(@.name =~ /screen|session/i)]",
+                "find_tool_by_name": "$.tools[?(@.name=='TOOL_NAME')]",
+                "server_capabilities": "$.servers[*].capabilities"
+            },
+            "sparse_mode_info": {
+                "visible_tools": ["mcp_discover", "mcp_call", "onboarding"],
+                "hidden_tools": len(self.raw_tool_list),
+                "purpose": "Context optimization - full MCP API accessible via proxy tools"
+            }
+        }
+        
+        # Add external servers
+        for server_name, server_info in servers.items():
+            server_tools = tools_by_server.get(server_name, [])
+            
+            api_doc["servers"][server_name] = {
+                "name": server_name,
+                "description": server_info.get("description", ""),
+                "command": server_info.get("command", []),
+                "status": server_info.get("status", "unknown"),
+                "tools": server_tools,
+                "tool_count": len(server_tools),
+                "tool_names": [t.get("name", "") for t in server_tools],
+                "environment": server_info.get("env", {}),
+                "working_directory": server_info.get("cwd"),
+                "capabilities": self._extract_capabilities(server_tools)
+            }
+        
+        return api_doc
+    
+    def _extract_capabilities(self, tools: List[Dict[str, Any]]) -> List[str]:
+        """Extract capabilities from tool list."""
+        capabilities = set()
+        
+        for tool in tools:
+            name = tool.get("name", "").lower()
+            desc = tool.get("description", "").lower()
+            
+            # Infer capabilities from tool names and descriptions
+            if any(keyword in name for keyword in ["read", "write", "file"]):
+                capabilities.add("file_operations")
+            if any(keyword in name for keyword in ["search", "query", "find"]):
+                capabilities.add("search_operations")
+            if any(keyword in name for keyword in ["web", "http", "url"]):
+                capabilities.add("web_operations")
+            if any(keyword in name for keyword in ["git", "repo", "commit"]):
+                capabilities.add("version_control")
+            if any(keyword in name for keyword in ["memory", "store", "save"]):
+                capabilities.add("data_storage")
+            if any(keyword in name for keyword in ["exec", "run", "command"]):
+                capabilities.add("command_execution")
+            if any(keyword in desc for keyword in ["browser", "scrape", "crawl"]):
+                capabilities.add("web_scraping")
+        
+        return sorted(list(capabilities))
+    
+    def set_metadata(self, metadata: Dict[str, Any]):
+        """Set metadata about servers and configuration."""
+        self._metadata = metadata
+    
+    def update_metadata(self, key: str, value: Any):
+        """Set specific metadata that can be discovered via JSONPath."""
        self._metadata[key] = value
    
    def to_json(self) -> str:
--- a/setup.py
+++ b/setup.py
@ -158,6 +158,105 @@ class TestCommand(Command):
        print("✅ All tests passed!")


+class GenerateApiDocs(Command):
+    """Generate comprehensive MCP API documentation for AI consumption."""
+    description = 'Generate JSON API documentation of all MCP servers and tools'
+    user_options = []
+    
+    def initialize_options(self):
+        pass
+    
+    def finalize_options(self):
+        pass
+    
+    def run(self):
+        """Generate comprehensive MCP API documentation."""
+        print("Generating MCP API Documentation...")
+        
+        import asyncio
+        from datetime import datetime
+        
+        async def generate_docs():
+            # Import here to avoid circular dependencies
+            from mcp_browser import MCPBrowser
+            from pathlib import Path
+            
+            try:
+                # Initialize MCP Browser with built-in servers
+                browser = MCPBrowser(enable_builtin_servers=True)
+                
+                # Wait for initialization and force tool discovery
+                await asyncio.sleep(3)
+                
+                # Try to trigger tool discovery
+                if hasattr(browser, 'discover_tools'):
+                    await browser.discover_tools()
+                elif hasattr(browser, 'multi_server') and browser.multi_server:
+                    # Force refresh of tools from built-in servers
+                    await browser.multi_server.refresh_tools()
+                
+                # Get comprehensive API documentation
+                api_doc = browser.registry.get_full_api_documentation()
+                api_doc["generation_timestamp"] = datetime.now().isoformat()
+                
+                # Add runtime server information
+                if hasattr(browser, 'multi_server') and browser.multi_server:
+                    server_status = {}
+                    for name, server in browser.multi_server.servers.items():
+                        server_status[name] = {
+                            "status": "active" if server.is_running() else "inactive",
+                            "pid": getattr(server, 'process', {}).get('pid') if hasattr(server, 'process') else None
+                        }
+                    api_doc["runtime_status"] = server_status
+                
+                # Write to file
+                output_file = Path("mcp_api_documentation.json")
+                with open(output_file, 'w') as f:
+                    import json
+                    json.dump(api_doc, f, indent=2)
+                
+                print(f"✓ Generated comprehensive MCP API documentation")
+                print(f"✓ Output: {output_file.absolute()}")
+                print(f"✓ Total servers: {api_doc['total_servers']}")
+                print(f"✓ Total tools: {api_doc['total_tools']}")
+                
+                # Also print summary for immediate use
+                print("\n" + "=" * 60)
+                print("QUICK REFERENCE:")
+                print("=" * 60)
+                
+                builtin = api_doc.get("builtin", {})
+                print(f"Built-in tools ({builtin.get('tool_count', 0)}):")
+                for tool in builtin.get("tools", [])[:5]:  # Show first 5
+                    print(f"  - {tool.get('name', 'Unknown')}")
+                if builtin.get('tool_count', 0) > 5:
+                    print(f"  ... and {builtin.get('tool_count', 0) - 5} more")
+                
+                print(f"\nExternal servers ({len(api_doc.get('servers', {}))}):")
+                for server_name, server_info in api_doc.get("servers", {}).items():
+                    print(f"  - {server_name}: {server_info.get('tool_count', 0)} tools")
+                
+                print(f"\nDiscovery patterns available in documentation:")
+                for pattern_name, pattern in api_doc.get("discovery_patterns", {}).items():
+                    print(f"  - {pattern_name}: {pattern}")
+                
+                # Clean up if method exists
+                if hasattr(browser, 'cleanup'):
+                    await browser.cleanup()
+                elif hasattr(browser, 'close'):
+                    await browser.close()
+                elif hasattr(browser, 'shutdown'):
+                    await browser.shutdown()
+                
+            except Exception as e:
+                print(f"✗ Failed to generate API documentation: {e}")
+                import traceback
+                traceback.print_exc()
+                sys.exit(1)
+        
+        asyncio.run(generate_docs())
+
+
 # Read long description
 with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()
@ -213,6 +312,7 @@ setup(
    cmdclass={
        'aidocs': GenerateAIDocs,
        'test': TestCommand,
+        'gen_apidoc': GenerateApiDocs,
    },
    license="GPL-3.0-or-later",
    classifiers=[