Implement basic rate-limiting based on session ID

nimishamehta5 · nimishamehta5 · commit 1773958ddb8b · 2025-05-06T17:33:09.000-07:00
Fixes: #22 Signed-off-by: Nimisha Mehta <nimishamehta5@gmail.com>
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@ MKP is a Model Context Protocol (MCP) server for Kubernetes that allows LLM-powe
 - Apply (create or update) clustered resources
 - Apply (create or update) namespaced resources
 - Generic and pluggable implementation using API Machinery's unstructured client
+- Built-in rate limiting for protection against excessive API calls
 
 ## Why MKP?
 
@@ -47,6 +48,7 @@ MKP offers several key advantages as a Model Context Protocol server for Kuberne
 ### Production-Ready Architecture
 - Designed for reliability and performance in production environments
 - Proper error handling and resource management
+- Built-in rate limiting to protect against excessive API calls
 - Testable design with comprehensive unit tests
 - Follows Kubernetes development best practices
 
@@ -291,6 +293,24 @@ By default, MKP operates in read-only mode, meaning it does not allow write oper
 ./build/mkp-server --kubeconfig=/path/to/kubeconfig --read-write=true
 ```
 
+### Rate Limiting
+
+MKP includes a built-in rate limiting mechanism to protect the server from excessive API calls, which is particularly important when used with AI agents. The rate limiter uses a token bucket algorithm and applies different limits based on the operation type:
+
+- Read operations (list_resources, get_resource): 120 requests per minute
+- Write operations (apply_resource, delete_resource): 30 requests per minute
+- Default for other operations: 60 requests per minute
+
+Rate limits are applied per client session, ensuring fair resource allocation across multiple clients. The rate limiting feature can be enabled or disabled via the server configuration.
+
+```bash
+# Run with rate limiting enabled (default)
+task run
+
+# Run with rate limiting disabled
+DISABLE_RATE_LIMITING=true task run
+```
+
 ## Development
 
 ### Running tests
diff --git a/cmd/server/main.go b/cmd/server/main.go
@@ -95,10 +95,17 @@ func main() {
 	shutdownCh := make(chan error, 1)
 	go func() {
 		log.Println("Initiating server shutdown...")
+		
+		// Stop the SSE server
 		err := sseServer.Shutdown(shutdownCtx)
 		if err != nil {
 			log.Printf("Error during shutdown: %v", err)
 		}
+		
+		// Stop the MCP server resources (including rate limiter)
+		log.Println("Stopping MCP server resources...")
+		mcp.StopServer()
+		
 		shutdownCh <- err
 		close(shutdownCh)
 	}()
diff --git a/pkg/mcp/server.go b/pkg/mcp/server.go
@@ -7,6 +7,7 @@ import (
 	"github.com/mark3labs/mcp-go/server"
 
 	"github.com/StacklokLabs/mkp/pkg/k8s"
+	"github.com/StacklokLabs/mkp/pkg/ratelimit"
 )
 
 // Config holds configuration options for the MCP server
@@ -18,16 +19,29 @@ type Config struct {
 	// ReadWrite determines whether the MCP server can modify resources in the cluster
 	// When false, the server operates in read-only mode and does not serve the apply_resource tool
 	ReadWrite bool
+
+	// EnableRateLimiting determines whether to enable rate limiting for tool calls
+	// When true, a default rate limiter will be used to prevent excessive API calls
+	EnableRateLimiting bool
 }
 
 // DefaultConfig returns a Config with default values
 func DefaultConfig() *Config {
 	return &Config{
-		ServeResources: true,  // Default to serving resources for backward compatibility
-		ReadWrite:      false, // Default to read-only mode
+		ServeResources:     true,  // Default to serving resources for backward compatibility
+		ReadWrite:          false, // Default to read-only mode
+		EnableRateLimiting: true,  // Default to enabling rate limiting
 	}
 }
 
+// serverResources holds resources that need to be cleaned up when the server is stopped
+type serverResources struct {
+	rateLimiter *ratelimit.RateLimiter
+}
+
+// Global variable to hold server resources
+var resources *serverResources
+
 // CreateServer creates a new MCP server for Kubernetes
 func CreateServer(k8sClient *k8s.Client, config *Config) *server.MCPServer {
 	// Use default config if none provided
@@ -37,12 +51,29 @@ func CreateServer(k8sClient *k8s.Client, config *Config) *server.MCPServer {
 	// Create MCP implementation
 	impl := NewImplementation(k8sClient)
 
-	// Create MCP server
+	options := []server.ServerOption{
+		server.WithResourceCapabilities(true, true),
+		server.WithToolCapabilities(true),
+	}
+
+	// Add rate limiting middleware if enabled
+	if config.EnableRateLimiting {
+		// Create and store the rate limiter for later cleanup
+		limiter := ratelimit.GetDefaultRateLimiter()
+		
+		// Store the limiter for cleanup when the server is stopped
+		resources = &serverResources{
+			rateLimiter: limiter,
+		}
+		
+		options = append(options, server.WithToolHandlerMiddleware(limiter.Middleware()))
+	}
+
+	// Create MCP server with all options
 	mcpServer := server.NewMCPServer(
 		"kubernetes-mcp-server",
 		"0.1.0",
-		server.WithResourceCapabilities(true, true),
-		server.WithToolCapabilities(true),
+		options...,
 	)
 
 	// Add tools
@@ -84,6 +115,17 @@ func CreateServer(k8sClient *k8s.Client, config *Config) *server.MCPServer {
 	return mcpServer
 }
 
+// StopServer stops the MCP server and cleans up resources
+func StopServer() {
+	// Clean up resources
+	if resources != nil {
+		// Stop the rate limiter if it exists
+		if resources.rateLimiter != nil {
+			resources.rateLimiter.Stop()
+		}
+	}
+}
+
 // CreateSSEServer creates a new SSE server for the MCP server
 func CreateSSEServer(mcpServer *server.MCPServer) *server.SSEServer {
 	return server.NewSSEServer(mcpServer)
diff --git a/pkg/ratelimit/config.go b/pkg/ratelimit/config.go
@@ -0,0 +1,34 @@
+package ratelimit
+
+const defaultLimit = 60
+
+// DefaultConfig defines the default rate limits for different tools
+var DefaultConfig = map[string]int{
+	// Read operations - higher limits
+	"list_resources": 120, // 120 requests per minute (2 per second)
+	"get_resource":   120, // 120 requests per minute (2 per second)
+	"read_resource":  120, // 120 requests per minute (2 per second)
+
+	// Write operations - lower limits
+	"apply_resource":  30, // 30 requests per minute (0.5 per second)
+	"delete_resource": 30, // 30 requests per minute (0.5 per second)
+
+	// Default for any other tool
+	"default": defaultLimit,
+}
+
+// GetDefaultRateLimiter returns a RateLimiter with default configuration
+func GetDefaultRateLimiter() *RateLimiter {
+	options := []RateLimiterOption{
+		WithDefaultLimit(DefaultConfig["default"]),
+	}
+
+	// Add tool-specific limits
+	for tool, limit := range DefaultConfig {
+		if tool != "default" {
+			options = append(options, WithToolLimit(tool, limit))
+		}
+	}
+
+	return NewRateLimiter(options...)
+}
diff --git a/pkg/ratelimit/ratelimit.go b/pkg/ratelimit/ratelimit.go
@@ -0,0 +1,181 @@
+package ratelimit
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/mark3labs/mcp-go/server"
+)
+
+const (
+	cleanupInterval = 10 * time.Minute
+	bucketTimeout   = 30 * time.Minute
+)
+
+// RateLimiter implements a rate limiting middleware for MCP server
+// It uses a token bucket algorithm to limit the number of requests per minute for each tool
+type RateLimiter struct {
+	mu            sync.RWMutex
+	limits        map[string]int                // Tool name to requests per minute
+	defaultLimit  int                           // Default requests per minute
+	buckets       map[string]map[string]*bucket // SessionID:[Tool:Bucket] mapping
+	cleanupTicker *time.Ticker
+}
+
+// bucket represents a token bucket for rate limiting
+type bucket struct {
+	mu       sync.Mutex
+	tokens   int       // Current number of tokens
+	lastSeen time.Time // Last time this bucket was accessed
+}
+
+// RateLimiterOption is a function that configures a RateLimiter
+type RateLimiterOption func(*RateLimiter)
+
+// WithToolLimit sets the rate limit for a specific tool
+func WithToolLimit(toolName string, requestsPerMinute int) RateLimiterOption {
+	return func(rl *RateLimiter) {
+		rl.limits[toolName] = requestsPerMinute
+	}
+}
+
+// WithDefaultLimit sets the default rate limit for all tools
+func WithDefaultLimit(requestsPerMinute int) RateLimiterOption {
+	return func(rl *RateLimiter) {
+		rl.defaultLimit = requestsPerMinute
+	}
+}
+
+// NewRateLimiter creates a new rate limiter with the given options
+func NewRateLimiter(opts ...RateLimiterOption) *RateLimiter {
+	rl := &RateLimiter{
+		limits:       make(map[string]int),
+		defaultLimit: defaultLimit,
+		buckets:      make(map[string]map[string]*bucket),
+	}
+
+	for _, opt := range opts {
+		opt(rl)
+	}
+
+	// Start a cleanup ticker to remove old buckets
+	rl.cleanupTicker = time.NewTicker(cleanupInterval)
+	go func() {
+		for range rl.cleanupTicker.C {
+			rl.cleanup()
+		}
+	}()
+
+	return rl
+}
+
+func (rl *RateLimiter) cleanup() {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	now := time.Now()
+	for sessionID, toolBuckets := range rl.buckets {
+		for tool, b := range toolBuckets {
+			b.mu.Lock()
+			// If bucket hasn't been used for bucketTimeout, remove it
+			if now.Sub(b.lastSeen) > bucketTimeout {
+				delete(toolBuckets, tool)
+			}
+			b.mu.Unlock()
+		}
+		// If no more buckets for this session, remove the session entry
+		if len(toolBuckets) == 0 {
+			delete(rl.buckets, sessionID)
+		}
+	}
+}
+
+// Stop stops the cleanup ticker
+func (rl *RateLimiter) Stop() {
+	if rl.cleanupTicker != nil {
+		rl.cleanupTicker.Stop()
+	}
+}
+
+// getSessionID extracts the session ID from the request context
+func getSessionID(ctx context.Context) string {
+	// Get the session from the context
+	if session := server.ClientSessionFromContext(ctx); session != nil {
+		return session.SessionID()
+	}
+	// If no session is available (which shouldn't happen in normal operation),
+	// return a default identifier
+	return "unknown"
+}
+
+// getBucket gets or creates a bucket for the given session ID and tool
+func (rl *RateLimiter) getBucket(sessionID, tool string) *bucket {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	// Create session map if it doesn't exist
+	if _, ok := rl.buckets[sessionID]; !ok {
+		rl.buckets[sessionID] = make(map[string]*bucket)
+	}
+
+	// Create bucket if it doesn't exist
+	if _, ok := rl.buckets[sessionID][tool]; !ok {
+		rl.buckets[sessionID][tool] = &bucket{
+			tokens:   rl.getLimit(tool), // Initialize with full tokens
+			lastSeen: time.Now(),
+		}
+	}
+
+	return rl.buckets[sessionID][tool]
+}
+
+// getLimit returns the rate limit for the given tool
+func (rl *RateLimiter) getLimit(tool string) int {
+	rl.mu.RLock()
+	defer rl.mu.RUnlock()
+
+	if limit, ok := rl.limits[tool]; ok {
+		return limit
+	}
+	return rl.defaultLimit
+}
+
+// Middleware returns a middleware function for the MCP server
+func (rl *RateLimiter) Middleware() server.ToolHandlerMiddleware {
+	return func(next server.ToolHandlerFunc) server.ToolHandlerFunc {
+		return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+			sessionID := getSessionID(ctx)
+			tool := request.Params.Name
+
+			b := rl.getBucket(sessionID, tool)
+			b.mu.Lock()
+			defer b.mu.Unlock()
+
+			now := time.Now()
+			b.lastSeen = now
+
+			// Calculate tokens to add based on time elapsed
+			limit := rl.getLimit(tool)
+			tokensPerSecond := float64(limit) / 60.0
+			elapsed := now.Sub(b.lastSeen).Seconds()
+			tokensToAdd := int(elapsed * tokensPerSecond)
+
+			// Add tokens, but don't exceed the limit
+			b.tokens = min(b.tokens+tokensToAdd, limit)
+
+			// Check if we have enough tokens
+			if b.tokens <= 0 {
+				return mcp.NewToolResultError(fmt.Sprintf("Rate limit exceeded for tool '%s'. Try again later.", tool)), nil
+			}
+
+			// Consume a token
+			b.tokens--
+
+			// Call the next handler
+			return next(ctx, request)
+		}
+	}
+}