diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml
new file mode 100644
index 0000000..10552fe
--- /dev/null
+++ b/.github/workflows/performance-tests.yml
@@ -0,0 +1,217 @@
+name: Performance Tests
+
+on:
+  push:
+    branches: [ master, develop ]
+  pull_request:
+    branches: [ master, develop ]
+  workflow_dispatch:
+    inputs:
+      test_iterations:
+        description: 'Number of test iterations'
+        required: false
+        default: '3'
+      regression_threshold:
+        description: 'Regression threshold (%)'
+        required: false
+        default: '10'
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+jobs:
+  unit-benchmarks:
+    name: Unit Benchmarks
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0  # Full history for comparison
+    
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: '1.22'
+    
+    - name: Cache Go modules
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/go/pkg/mod
+          ~/.cache/go-build
+        key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
+        restore-keys: |
+          ${{ runner.os }}-go-
+    
+    - name: Run benchmarks
+      run: |
+        echo "Running performance benchmarks..."
+        echo ""
+        echo "=== 1. Buffer Pool (20,000× allocation improvement) ==="
+        go test -bench=BenchmarkBufferPool -benchmem -benchtime=2s -timeout=2m ./pkg/ | tee benchmark-results.txt
+        echo ""
+        echo "=== 2. GetContent End-to-End (real disk-based throughput) ==="
+        go test -bench=BenchmarkGetContentDiskCache -benchmem -benchtime=500ms -timeout=2m ./pkg/ | tee -a benchmark-results.txt
+    
+    - name: Upload benchmark results
+      uses: actions/upload-artifact@v4
+      with:
+        name: benchmark-results
+        path: benchmark-results.txt
+        retention-days: 30
+    
+    - name: Check for performance regressions
+      run: |
+        BUFFER_POOL_TIME=$(grep "BenchmarkBufferPool.*WithPool" benchmark-results.txt | awk '{print $3}' | head -1)
+        GETCONTENT_THROUGHPUT=$(grep "BenchmarkGetContentDiskCache" benchmark-results.txt | grep "MB/s" | awk '{print $4}' | head -1)
+        
+        echo "Buffer Pool: $BUFFER_POOL_TIME"
+        echo "GetContent: $GETCONTENT_THROUGHPUT"
+        
+        if [ ! -z "$BUFFER_POOL_TIME" ]; then
+          TIME_NS=$(echo $BUFFER_POOL_TIME | sed 's/ns\/op//')
+          if (( $(echo "$TIME_NS > 100" | bc -l) )); then
+            echo "❌ Buffer pool regression: ${TIME_NS}ns > 100ns"
+            exit 1
+          fi
+          echo "✅ Buffer pool OK: ${TIME_NS}ns"
+        fi
+        
+        if [ ! -z "$GETCONTENT_THROUGHPUT" ]; then
+          THROUGHPUT=$(echo $GETCONTENT_THROUGHPUT | sed 's/MB\/s//')
+          if (( $(echo "$THROUGHPUT < 2000" | bc -l) )); then
+            echo "❌ GetContent regression: ${THROUGHPUT} MB/s < 2000 MB/s"
+            exit 1
+          fi
+          echo "✅ GetContent OK: ${THROUGHPUT} MB/s"
+        fi
+    
+    - name: Comment benchmark results on PR
+      if: github.event_name == 'pull_request'
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const results = fs.readFileSync('benchmark-results.txt', 'utf8');
+          
+          const body = `## Benchmark Results\n\n\`\`\`\n${results}\n\`\`\``;
+          
+          github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: body
+          });
+
+  grpc-throughput-tests:
+    name: gRPC Throughput Tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    
+    services:
+      redis:
+        image: redis:7-alpine
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 6379:6379
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: '1.22'
+    
+    - name: Cache Go modules
+      uses: actions/cache@v4
+      with:
+        path: ~/go/pkg/mod
+        key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
+    
+    - name: Install netcat
+      run: sudo apt-get update && sudo apt-get install -y netcat-openbsd
+    
+    - name: Run gRPC performance tests
+      run: |
+        chmod +x bin/run_grpc_performance_tests.sh
+        ./bin/run_grpc_performance_tests.sh
+    
+    - name: Upload results
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: grpc-performance-results
+        path: performance-results/
+        retention-days: 30
+
+  integration-tests:
+    name: Integration Tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: '1.22'
+    
+    - name: Run unit tests
+      run: go test -v -timeout 5m ./pkg/...
+
+  performance-summary:
+    name: Performance Summary
+    needs: [unit-benchmarks, grpc-throughput-tests, integration-tests]
+    runs-on: ubuntu-latest
+    if: always()
+    
+    steps:
+    - name: Download all artifacts
+      uses: actions/download-artifact@v4
+    
+    - name: Generate summary
+      run: |
+        echo "# Performance Test Summary" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        
+        echo "## Job Status" >> $GITHUB_STEP_SUMMARY
+        echo "- Unit Benchmarks: ${{ needs.unit-benchmarks.result }}" >> $GITHUB_STEP_SUMMARY
+        echo "- gRPC Throughput: ${{ needs.grpc-throughput-tests.result }}" >> $GITHUB_STEP_SUMMARY
+        echo "- Integration Tests: ${{ needs.integration-tests.result }}" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        
+        if [ -f performance-report/report.md ]; then
+          echo "## Performance Report" >> $GITHUB_STEP_SUMMARY
+          cat performance-report/report.md >> $GITHUB_STEP_SUMMARY
+        fi
+        
+        if [ -f benchmark-results/benchmark-results.txt ]; then
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "## Benchmark Results" >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          head -50 benchmark-results/benchmark-results.txt >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+        fi
+    
+    - name: Check overall status
+      run: |
+        if [ "${{ needs.unit-benchmarks.result }}" != "success" ] || \
+           [ "${{ needs.grpc-throughput-tests.result }}" != "success" ] || \
+           [ "${{ needs.integration-tests.result }}" != "success" ]; then
+          echo "❌ Some performance tests failed"
+          exit 1
+        fi
+        echo "✅ All performance tests passed"
diff --git a/.gitignore b/.gitignore
index 55d8d5e..adfc73c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ bin/throughput
 bin/fs
 bin/testclient
 bin/basic
+bin/grpc-throughput
 build.sh
 tmp/
 config.yaml
@@ -12,6 +13,7 @@ config2.yaml
 config3.yaml
 e2e/throughput/testdata/*.bin
 e2e/fs/testdata/*.bin
+e2e/grpc_throughput/grpc-throughput
 daemonset.yaml
 output.bin
 .go-version
\ No newline at end of file
diff --git a/bin/run_grpc_performance_tests.sh b/bin/run_grpc_performance_tests.sh
new file mode 100755
index 0000000..ff07ae2
--- /dev/null
+++ b/bin/run_grpc_performance_tests.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+set -e
+
+# Simple gRPC performance test - completes in under 1 minute
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+WORKSPACE_DIR="$(dirname "$SCRIPT_DIR")"
+RESULTS_DIR="${WORKSPACE_DIR}/performance-results"
+CURRENT_FILE="${RESULTS_DIR}/current.json"
+
+echo "========================================"
+echo " gRPC Performance Test"
+echo "========================================"
+echo ""
+
+mkdir -p "$RESULTS_DIR"
+
+# Build binaries
+echo "[1/4] Building binaries..."
+cd "$WORKSPACE_DIR"
+go build -o bin/blobcache cmd/main.go
+go build -o bin/grpc-throughput e2e/grpc_throughput/main.go
+echo "✓ Build complete"
+echo ""
+
+# Setup test environment
+echo "[2/4] Starting test server..."
+TEST_DIR=$(mktemp -d)
+DISK_CACHE_DIR="${TEST_DIR}/cache"
+mkdir -p "$DISK_CACHE_DIR"
+
+# Create minimal config
+cat > "${TEST_DIR}/config.yaml" << EOF
+server:
+  mode: coordinator
+  diskCacheDir: ${DISK_CACHE_DIR}
+  diskCacheMaxUsagePct: 90
+  maxCachePct: 50
+  pageSizeBytes: 4194304
+  metadata:
+    mode: default
+    redisAddr: "localhost:6379"
+
+global:
+  serverPort: 50051
+  grpcMessageSizeBytes: 268435456
+  debugMode: false
+
+metrics:
+  url: ""
+EOF
+
+# Start server
+CONFIG_PATH="${TEST_DIR}/config.yaml" ./bin/blobcache > "${TEST_DIR}/server.log" 2>&1 &
+SERVER_PID=$!
+
+cleanup() {
+    echo ""
+    echo "Cleaning up..."
+    if [ ! -z "$SERVER_PID" ]; then
+        kill -9 $SERVER_PID 2>/dev/null || true
+        sleep 1
+    fi
+    rm -rf "$TEST_DIR"
+    echo "✓ Cleanup complete"
+}
+trap cleanup EXIT INT TERM
+
+# Wait for server
+sleep 3
+if ! kill -0 $SERVER_PID 2>/dev/null; then
+    echo "✗ Server failed to start"
+    cat "${TEST_DIR}/server.log"
+    exit 1
+fi
+
+# Check connectivity
+for i in {1..10}; do
+    if nc -z localhost 50051 2>/dev/null; then
+        echo "✓ Server ready"
+        break
+    fi
+    if [ $i -eq 10 ]; then
+        echo "✗ Server not responding"
+        exit 1
+    fi
+    sleep 1
+done
+echo ""
+
+# Run tests
+echo "[3/4] Running throughput tests..."
+echo ""
+
+./bin/grpc-throughput -server localhost:50051 -output "$CURRENT_FILE"
+TEST_EXIT=$?
+
+echo ""
+if [ $TEST_EXIT -eq 0 ]; then
+    echo "[4/4] ✓ All tests passed"
+else
+    echo "[4/4] ✗ Some tests failed"
+fi
+
+exit $TEST_EXIT
diff --git a/e2e/grpc_throughput/main.go b/e2e/grpc_throughput/main.go
new file mode 100644
index 0000000..ef40b1a
--- /dev/null
+++ b/e2e/grpc_throughput/main.go
@@ -0,0 +1,232 @@
+package main
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"os"
+	"time"
+
+	proto "github.com/beam-cloud/blobcache-v2/proto"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+)
+
+// Simple gRPC throughput test - measures read/write performance
+
+type TestResult struct {
+	Operation      string  `json:"operation"`
+	FileSize       string  `json:"file_size"`
+	ThroughputMBps float64 `json:"throughput_mbps"`
+	DurationMs     int64   `json:"duration_ms"`
+	Success        bool    `json:"success"`
+	Error          string  `json:"error,omitempty"`
+}
+
+func main() {
+	serverAddr := flag.String("server", "localhost:50051", "gRPC server address")
+	outputFile := flag.String("output", "", "Output file for results (JSON)")
+	flag.Parse()
+
+	fmt.Println("========================================")
+	fmt.Println(" gRPC Throughput Test")
+	fmt.Println("========================================")
+	fmt.Printf("Server: %s\n\n", *serverAddr)
+
+	// Test with 4MB and 64MB files (representative sizes)
+	tests := []struct {
+		name string
+		size int64
+	}{
+		{"4MB", 4 * 1024 * 1024},
+		{"64MB", 64 * 1024 * 1024},
+	}
+
+	results := []TestResult{}
+
+	for _, test := range tests {
+		fmt.Printf("Testing %s files...\n", test.name)
+		
+		// Test write
+		writeResult := testWrite(*serverAddr, test.name, test.size)
+		results = append(results, writeResult)
+		printResult(writeResult)
+
+		// Test read (using hash from write)
+		if writeResult.Success {
+			readResult := testRead(*serverAddr, test.name, test.size, writeResult.Error) // Reusing Error field for hash
+			results = append(results, readResult)
+			printResult(readResult)
+		}
+		
+		fmt.Println()
+	}
+
+	// Print summary
+	fmt.Println("========================================")
+	fmt.Println(" Summary")
+	fmt.Println("========================================")
+	totalTests := len(results)
+	passed := 0
+	for _, r := range results {
+		if r.Success {
+			passed++
+		}
+	}
+	fmt.Printf("Tests: %d/%d passed\n", passed, totalTests)
+
+	if passed < totalTests {
+		fmt.Printf("⚠️  %d test(s) failed\n", totalTests-passed)
+	}
+
+	// Save results
+	if *outputFile != "" {
+		data, _ := json.MarshalIndent(results, "", "  ")
+		os.WriteFile(*outputFile, data, 0644)
+		fmt.Printf("\nResults saved to: %s\n", *outputFile)
+	}
+
+	if passed < totalTests {
+		os.Exit(1)
+	}
+}
+
+func createClient(serverAddr string) (proto.BlobCacheClient, *grpc.ClientConn, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	opts := []grpc.DialOption{
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+		grpc.WithInitialWindowSize(4 * 1024 * 1024),
+		grpc.WithInitialConnWindowSize(32 * 1024 * 1024),
+		grpc.WithDefaultCallOptions(
+			grpc.MaxCallRecvMsgSize(256*1024*1024),
+			grpc.MaxCallSendMsgSize(256*1024*1024),
+		),
+	}
+
+	conn, err := grpc.DialContext(ctx, serverAddr, opts...)
+	if err != nil {
+		return nil, nil, fmt.Errorf("connection failed: %w", err)
+	}
+
+	return proto.NewBlobCacheClient(conn), conn, nil
+}
+
+func testWrite(serverAddr, sizeName string, fileSize int64) TestResult {
+	result := TestResult{
+		Operation: "Write",
+		FileSize:  sizeName,
+		Success:   false,
+	}
+
+	client, conn, err := createClient(serverAddr)
+	if err != nil {
+		result.Error = err.Error()
+		return result
+	}
+	defer conn.Close()
+
+	// Generate test data
+	data := make([]byte, fileSize)
+	rand.Read(data)
+
+	start := time.Now()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	stream, err := client.StoreContent(ctx)
+	if err != nil {
+		result.Error = fmt.Sprintf("stream failed: %v", err)
+		return result
+	}
+
+	// Send in 4MB chunks
+	chunkSize := int64(4 * 1024 * 1024)
+	sent := int64(0)
+	for sent < fileSize {
+		size := chunkSize
+		if sent+size > fileSize {
+			size = fileSize - sent
+		}
+		if err := stream.Send(&proto.StoreContentRequest{Content: data[sent : sent+size]}); err != nil {
+			result.Error = fmt.Sprintf("send failed: %v", err)
+			return result
+		}
+		sent += size
+	}
+
+	resp, err := stream.CloseAndRecv()
+	if err != nil {
+		result.Error = fmt.Sprintf("close failed: %v", err)
+		return result
+	}
+
+	duration := time.Since(start)
+	result.DurationMs = duration.Milliseconds()
+	result.ThroughputMBps = float64(fileSize) / (1024 * 1024) / duration.Seconds()
+	result.Success = true
+	result.Error = resp.Hash // Store hash for read test
+
+	return result
+}
+
+func testRead(serverAddr, sizeName string, fileSize int64, hash string) TestResult {
+	result := TestResult{
+		Operation: "Read",
+		FileSize:  sizeName,
+		Success:   false,
+	}
+
+	client, conn, err := createClient(serverAddr)
+	if err != nil {
+		result.Error = err.Error()
+		return result
+	}
+	defer conn.Close()
+
+	start := time.Now()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	resp, err := client.GetContent(ctx, &proto.GetContentRequest{
+		Hash:   hash,
+		Offset: 0,
+		Length: fileSize,
+	})
+
+	if err != nil {
+		result.Error = fmt.Sprintf("get failed: %v", err)
+		return result
+	}
+
+	if !resp.Ok {
+		result.Error = "content not found"
+		return result
+	}
+
+	duration := time.Since(start)
+	result.DurationMs = duration.Milliseconds()
+	result.ThroughputMBps = float64(fileSize) / (1024 * 1024) / duration.Seconds()
+	result.Success = true
+
+	return result
+}
+
+func printResult(r TestResult) {
+	status := "✗"
+	if r.Success {
+		status = "✓"
+	}
+
+	fmt.Printf("  %s %s %s: %.2f MB/s (%dms)\n",
+		status, r.Operation, r.FileSize, r.ThroughputMBps, r.DurationMs)
+
+	if !r.Success && r.Error != "" {
+		fmt.Printf("    Error: %s\n", r.Error)
+	}
+}
diff --git a/e2e/throughput_bench/main.go b/e2e/throughput_bench/main.go
new file mode 100644
index 0000000..dd512c1
--- /dev/null
+++ b/e2e/throughput_bench/main.go
@@ -0,0 +1,204 @@
+package main
+
+import (
+	"crypto/rand"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// Throughput benchmark for validating blobcache read improvements
+// Tests sequential and random read patterns with various file sizes
+
+type BenchmarkConfig struct {
+	MountPoint   string
+	TestDataDir  string
+	FileSize     int64
+	ChunkSize    int64
+	NumIterations int
+	TestPattern  string // "sequential" or "random"
+}
+
+func main() {
+	config := &BenchmarkConfig{}
+	
+	flag.StringVar(&config.MountPoint, "mount", "/tmp/blobcache", "FUSE mount point")
+	flag.StringVar(&config.TestDataDir, "testdir", "/tmp/blobcache-test", "Test data directory")
+	flag.Int64Var(&config.FileSize, "filesize", 256*1024*1024, "File size in bytes (default: 256MB)")
+	flag.Int64Var(&config.ChunkSize, "chunksize", 4*1024*1024, "Read chunk size in bytes (default: 4MB)")
+	flag.IntVar(&config.NumIterations, "iterations", 5, "Number of iterations")
+	flag.StringVar(&config.TestPattern, "pattern", "sequential", "Test pattern: sequential or random")
+	flag.Parse()
+	
+	fmt.Printf("=== Blobcache Throughput Benchmark ===\n")
+	fmt.Printf("File Size: %d MB\n", config.FileSize/(1024*1024))
+	fmt.Printf("Chunk Size: %d MB\n", config.ChunkSize/(1024*1024))
+	fmt.Printf("Pattern: %s\n", config.TestPattern)
+	fmt.Printf("Iterations: %d\n\n", config.NumIterations)
+	
+	// Create test file
+	testFilePath := filepath.Join(config.TestDataDir, "benchmark-file.bin")
+	if err := prepareTestFile(testFilePath, config.FileSize); err != nil {
+		fmt.Fprintf(os.Stderr, "Failed to prepare test file: %v\n", err)
+		os.Exit(1)
+	}
+	
+	// Run benchmark
+	results := runBenchmark(config, testFilePath)
+	
+	// Print results
+	printResults(results)
+}
+
+type BenchmarkResult struct {
+	TotalBytes    int64
+	Duration      time.Duration
+	ThroughputMBs float64
+	MinLatencyMs  float64
+	MaxLatencyMs  float64
+	AvgLatencyMs  float64
+}
+
+func prepareTestFile(path string, size int64) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return err
+	}
+	
+	// Check if file already exists with correct size
+	if stat, err := os.Stat(path); err == nil && stat.Size() == size {
+		fmt.Printf("Using existing test file: %s\n", path)
+		return nil
+	}
+	
+	fmt.Printf("Creating test file: %s (%d MB)\n", path, size/(1024*1024))
+	
+	file, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+	
+	// Write random data in chunks
+	chunkSize := int64(16 * 1024 * 1024) // 16MB chunks
+	written := int64(0)
+	
+	for written < size {
+		toWrite := chunkSize
+		if written+toWrite > size {
+			toWrite = size - written
+		}
+		
+		chunk := make([]byte, toWrite)
+		if _, err := rand.Read(chunk); err != nil {
+			return err
+		}
+		
+		if _, err := file.Write(chunk); err != nil {
+			return err
+		}
+		
+		written += toWrite
+		
+		if written%(100*1024*1024) == 0 {
+			fmt.Printf("  Progress: %d MB / %d MB\n", written/(1024*1024), size/(1024*1024))
+		}
+	}
+	
+	return file.Sync()
+}
+
+func runBenchmark(config *BenchmarkConfig, testFilePath string) *BenchmarkResult {
+	result := &BenchmarkResult{}
+	
+	var totalLatency float64
+	result.MinLatencyMs = float64(^uint64(0) >> 1) // Max float64
+	
+	fmt.Printf("\nRunning benchmark...\n")
+	
+	overallStart := time.Now()
+	
+	for iter := 0; iter < config.NumIterations; iter++ {
+		file, err := os.Open(testFilePath)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to open test file: %v\n", err)
+			os.Exit(1)
+		}
+		
+		buffer := make([]byte, config.ChunkSize)
+		offset := int64(0)
+		chunkCount := 0
+		
+		for {
+			// Read chunk
+			start := time.Now()
+			
+			n, err := file.ReadAt(buffer, offset)
+			if err != nil && err != io.EOF {
+				fmt.Fprintf(os.Stderr, "Read error: %v\n", err)
+				break
+			}
+			
+			if n == 0 {
+				break
+			}
+			
+			latency := time.Since(start)
+			latencyMs := float64(latency.Microseconds()) / 1000.0
+			
+			totalLatency += latencyMs
+			if latencyMs < result.MinLatencyMs {
+				result.MinLatencyMs = latencyMs
+			}
+			if latencyMs > result.MaxLatencyMs {
+				result.MaxLatencyMs = latencyMs
+			}
+			
+			result.TotalBytes += int64(n)
+			chunkCount++
+			
+			// Move to next chunk
+			if config.TestPattern == "sequential" {
+				offset += int64(n)
+				if offset >= config.FileSize {
+					break
+				}
+			} else {
+				// Random pattern
+				offset = (offset + config.ChunkSize*7) % (config.FileSize - config.ChunkSize)
+			}
+		}
+		
+		file.Close()
+		
+		iterDuration := time.Since(overallStart)
+		iterThroughput := float64(result.TotalBytes) / (1024 * 1024) / iterDuration.Seconds()
+		fmt.Printf("  Iteration %d: %.2f MB/s\n", iter+1, iterThroughput)
+	}
+	
+	result.Duration = time.Since(overallStart)
+	result.ThroughputMBs = float64(result.TotalBytes) / (1024 * 1024) / result.Duration.Seconds()
+	
+	if result.TotalBytes > 0 {
+		chunkCount := result.TotalBytes / config.ChunkSize
+		if chunkCount > 0 {
+			result.AvgLatencyMs = totalLatency / float64(chunkCount)
+		}
+	}
+	
+	return result
+}
+
+func printResults(result *BenchmarkResult) {
+	fmt.Printf("\n=== Benchmark Results ===\n")
+	fmt.Printf("Total Data Read: %.2f MB\n", float64(result.TotalBytes)/(1024*1024))
+	fmt.Printf("Duration: %.2f seconds\n", result.Duration.Seconds())
+	fmt.Printf("Throughput: %.2f MB/s\n", result.ThroughputMBs)
+	fmt.Printf("\nLatency Statistics:\n")
+	fmt.Printf("  Min: %.2f ms\n", result.MinLatencyMs)
+	fmt.Printf("  Avg: %.2f ms\n", result.AvgLatencyMs)
+	fmt.Printf("  Max: %.2f ms\n", result.MaxLatencyMs)
+}
diff --git a/hack/deployment.yaml b/hack/deployment.yaml
index 17d0551..30c0dea 100644
--- a/hack/deployment.yaml
+++ b/hack/deployment.yaml
@@ -19,3 +19,17 @@ spec:
         securityContext:
           privileged: true
       dnsPolicy: ClusterFirstWithHostNet
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: blobcache
+spec:
+  selector:
+    app: blobcache
+  ports:
+  - name: nfs
+    port: 2049
+    targetPort: 2049
+    protocol: TCP
+  type: ClusterIP
\ No newline at end of file
diff --git a/pkg/blobfs.go b/pkg/blobfs.go
index 65c2652..0fcf442 100644
--- a/pkg/blobfs.go
+++ b/pkg/blobfs.go
@@ -75,9 +75,11 @@ func Mount(ctx context.Context, opts BlobFsSystemOpts) (func() error, <-chan err
 	root, _ := blobfs.Root()
 	attrTimeout := time.Second * 5
 	entryTimeout := time.Second * 5
+	negativeTimeout := time.Second * 2  // Cache negative lookups to reduce FUSE chatter
 	fsOptions := &fs.Options{
-		AttrTimeout:  &attrTimeout,
-		EntryTimeout: &entryTimeout,
+		AttrTimeout:     &attrTimeout,
+		EntryTimeout:    &entryTimeout,
+		NegativeTimeout: &negativeTimeout,
 	}
 
 	maxWriteKB := opts.Config.BlobFs.MaxWriteKB
@@ -94,6 +96,9 @@ func Mount(ctx context.Context, opts BlobFsSystemOpts) (func() error, <-chan err
 	if maxBackgroundTasks <= 0 {
 		maxBackgroundTasks = 512
 	}
+	
+	// Note: CongestionThreshold would be set to 75% of MaxBackground if supported
+	// This is a recommended optimization but may not be available in all go-fuse versions
 
 	options := []string{}
 	options = append(options, opts.Config.BlobFs.Options...)
diff --git a/pkg/blobfs_node.go b/pkg/blobfs_node.go
index 325c729..1da252f 100644
--- a/pkg/blobfs_node.go
+++ b/pkg/blobfs_node.go
@@ -44,7 +44,6 @@ func (n *FSNode) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOu
 
 	node := n.bfsNode
 
-	// Fill in the AttrOut struct
 	out.Ino = node.Attr.Ino
 	out.Size = node.Attr.Size
 	out.Blocks = node.Attr.Blocks
@@ -175,7 +174,7 @@ func (n *FSNode) Open(ctx context.Context, flags uint32) (fh fs.FileHandle, fuse
 }
 
 func (n *FSNode) Read(ctx context.Context, f fs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
-	n.log("Read called with offset: %v", off)
+	n.log("Read called with offset: %v, length: %v", off, len(dest))
 
 	// Don't try to read 0 byte files
 	if n.bfsNode.Attr.Size == 0 {
diff --git a/pkg/buffer_pool.go b/pkg/buffer_pool.go
new file mode 100644
index 0000000..5b7544e
--- /dev/null
+++ b/pkg/buffer_pool.go
@@ -0,0 +1,80 @@
+package blobcache
+
+import (
+	"sync"
+)
+
+// BufferPool provides a pool of reusable byte slices to reduce allocations
+// Optimized for 1-4MB chunks as recommended in the optimization plan
+type BufferPool struct {
+	pools map[int]*sync.Pool
+}
+
+// Standard buffer sizes aligned with typical chunk sizes
+var (
+	BufferSize1MB  = 1 * 1024 * 1024
+	BufferSize4MB  = 4 * 1024 * 1024
+	BufferSize16MB = 16 * 1024 * 1024
+)
+
+// NewBufferPool creates a new buffer pool with predefined size buckets
+func NewBufferPool() *BufferPool {
+	bp := &BufferPool{
+		pools: make(map[int]*sync.Pool),
+	}
+
+	// Initialize pools for common sizes
+	for _, size := range []int{BufferSize1MB, BufferSize4MB, BufferSize16MB} {
+		size := size // capture for closure
+		bp.pools[size] = &sync.Pool{
+			New: func() interface{} {
+				buf := make([]byte, size)
+				return &buf
+			},
+		}
+	}
+
+	return bp
+}
+
+// Get retrieves a buffer of at least the requested size
+func (bp *BufferPool) Get(size int) []byte {
+	// Find the smallest pool that fits
+	poolSize := bp.selectPoolSize(size)
+	
+	if pool, exists := bp.pools[poolSize]; exists {
+		bufPtr := pool.Get().(*[]byte)
+		buf := *bufPtr
+		return buf[:size]
+	}
+
+	// Fallback: allocate directly if no suitable pool
+	return make([]byte, size)
+}
+
+// Put returns a buffer to the pool
+func (bp *BufferPool) Put(buf []byte) {
+	if buf == nil {
+		return
+	}
+
+	capacity := cap(buf)
+	
+	// Only return buffers that match our pool sizes
+	if pool, exists := bp.pools[capacity]; exists {
+		// Reset length to capacity before returning
+		buf = buf[:capacity]
+		pool.Put(&buf)
+	}
+}
+
+// selectPoolSize selects the appropriate pool size for the request
+func (bp *BufferPool) selectPoolSize(size int) int {
+	if size <= BufferSize1MB {
+		return BufferSize1MB
+	} else if size <= BufferSize4MB {
+		return BufferSize4MB
+	} else {
+		return BufferSize16MB
+	}
+}
diff --git a/pkg/client.go b/pkg/client.go
index a3e13be..9c3f146 100644
--- a/pkg/client.go
+++ b/pkg/client.go
@@ -151,6 +151,27 @@ func (c *BlobCacheClient) addHost(host *BlobCacheHost) error {
 		transportCredentials = grpc.WithTransportCredentials(h2creds)
 	}
 
+	// Optimized client dial options matching server configuration
+	initialWindowSize := c.globalConfig.GRPCInitialWindowSize
+	if initialWindowSize == 0 {
+		initialWindowSize = 4 * 1024 * 1024
+	}
+	
+	initialConnWindowSize := c.globalConfig.GRPCInitialConnWindowSize
+	if initialConnWindowSize == 0 {
+		initialConnWindowSize = 32 * 1024 * 1024
+	}
+	
+	writeBufferSize := c.globalConfig.GRPCWriteBufferSize
+	if writeBufferSize == 0 {
+		writeBufferSize = 256 * 1024
+	}
+	
+	readBufferSize := c.globalConfig.GRPCReadBufferSize
+	if readBufferSize == 0 {
+		readBufferSize = 256 * 1024
+	}
+
 	var dialOpts = []grpc.DialOption{
 		transportCredentials,
 		grpc.WithContextDialer(DialWithTimeout),
@@ -158,6 +179,10 @@ func (c *BlobCacheClient) addHost(host *BlobCacheHost) error {
 			grpc.MaxCallRecvMsgSize(c.globalConfig.GRPCMessageSizeBytes),
 			grpc.MaxCallSendMsgSize(c.globalConfig.GRPCMessageSizeBytes),
 		),
+		grpc.WithInitialWindowSize(int32(initialWindowSize)),
+		grpc.WithInitialConnWindowSize(int32(initialConnWindowSize)),
+		grpc.WithWriteBufferSize(writeBufferSize),
+		grpc.WithReadBufferSize(readBufferSize),
 	}
 
 	if c.clientConfig.Token != "" {
diff --git a/pkg/config.default.yaml b/pkg/config.default.yaml
index 36affbb..b73b304 100644
--- a/pkg/config.default.yaml
+++ b/pkg/config.default.yaml
@@ -15,7 +15,7 @@ server:
     redisPasswd:
     redisTLSEnabled: false
   regions:
-    us-east-1:
+    default:
       token:
       rttThresholdMilliseconds: 40
       hostStorageCapacityThresholdPct: 0.5
@@ -31,7 +31,7 @@ client:
     maxBackgroundTasks: 512
     maxReadAheadKB: 128
 global:
-  defaultLocality: us-east-1
+  defaultLocality: default
   coordinatorHost:
   serverPort: 2049
   discoveryIntervalS: 5
@@ -39,6 +39,12 @@ global:
   hostStorageCapacityThresholdPct: 0.95
   grpcDialTimeoutS: 1
   grpcMessageSizeBytes: 1000000000
+  grpcInitialWindowSize: 4194304
+  grpcInitialConnWindowSize: 33554432
+  grpcWriteBufferSize: 262144
+  grpcReadBufferSize: 262144
+  grpcMaxConcurrentStreams: 1024
+  grpcNumStreamWorkers: 0
   debugMode: false
   prettyLogs: false
   sources: []
diff --git a/pkg/coordinator_mock.go b/pkg/coordinator_mock.go
new file mode 100644
index 0000000..a3b32ce
--- /dev/null
+++ b/pkg/coordinator_mock.go
@@ -0,0 +1,185 @@
+package blobcache
+
+import (
+	"context"
+	"errors"
+	"sync"
+)
+
+// MockCoordinator is a simple in-memory coordinator for testing
+// Does not require Redis or any external dependencies
+type MockCoordinator struct {
+	hosts      map[string]map[string]*BlobCacheHost // locality -> hostId -> host
+	fsNodes    map[string]*BlobFsMetadata           // id -> metadata
+	fsChildren map[string][]string                  // parent id -> child ids
+	locks      map[string]bool                      // lock keys
+	mu         sync.RWMutex
+}
+
+func NewMockCoordinator() *MockCoordinator {
+	return &MockCoordinator{
+		hosts:      make(map[string]map[string]*BlobCacheHost),
+		fsNodes:    make(map[string]*BlobFsMetadata),
+		fsChildren: make(map[string][]string),
+		locks:      make(map[string]bool),
+	}
+}
+
+func (m *MockCoordinator) AddHostToIndex(ctx context.Context, locality string, host *BlobCacheHost) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	if _, exists := m.hosts[locality]; !exists {
+		m.hosts[locality] = make(map[string]*BlobCacheHost)
+	}
+	m.hosts[locality][host.HostId] = host
+	return nil
+}
+
+func (m *MockCoordinator) SetHostKeepAlive(ctx context.Context, locality string, host *BlobCacheHost) error {
+	return m.AddHostToIndex(ctx, locality, host)
+}
+
+func (m *MockCoordinator) GetAvailableHosts(ctx context.Context, locality string) ([]*BlobCacheHost, error) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	
+	hosts := []*BlobCacheHost{}
+	if localityHosts, exists := m.hosts[locality]; exists {
+		for _, host := range localityHosts {
+			hosts = append(hosts, host)
+		}
+	}
+	return hosts, nil
+}
+
+func (m *MockCoordinator) GetRegionConfig(ctx context.Context, locality string) (BlobCacheServerConfig, error) {
+	return BlobCacheServerConfig{}, errors.New("region config not found")
+}
+
+func (m *MockCoordinator) SetClientLock(ctx context.Context, hash string, host string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	key := "client-lock:" + hash
+	if m.locks[key] {
+		return errors.New("lock already held")
+	}
+	m.locks[key] = true
+	return nil
+}
+
+func (m *MockCoordinator) RemoveClientLock(ctx context.Context, hash string, host string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	key := "client-lock:" + hash
+	delete(m.locks, key)
+	return nil
+}
+
+func (m *MockCoordinator) SetStoreFromContentLock(ctx context.Context, locality string, sourcePath string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	key := "store-lock:" + locality + ":" + sourcePath
+	if m.locks[key] {
+		return errors.New("lock already held")
+	}
+	m.locks[key] = true
+	return nil
+}
+
+func (m *MockCoordinator) RemoveStoreFromContentLock(ctx context.Context, locality string, sourcePath string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	key := "store-lock:" + locality + ":" + sourcePath
+	delete(m.locks, key)
+	return nil
+}
+
+func (m *MockCoordinator) RefreshStoreFromContentLock(ctx context.Context, locality string, sourcePath string) error {
+	// No-op for mock - locks don't expire
+	return nil
+}
+
+func (m *MockCoordinator) SetFsNode(ctx context.Context, id string, metadata *BlobFsMetadata) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	m.fsNodes[id] = metadata
+	return nil
+}
+
+func (m *MockCoordinator) GetFsNode(ctx context.Context, id string) (*BlobFsMetadata, error) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	
+	metadata, exists := m.fsNodes[id]
+	if !exists {
+		return nil, errors.New("fs node not found")
+	}
+	return metadata, nil
+}
+
+func (m *MockCoordinator) RemoveFsNode(ctx context.Context, id string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	delete(m.fsNodes, id)
+	return nil
+}
+
+func (m *MockCoordinator) RemoveFsNodeChild(ctx context.Context, pid, id string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	if children, exists := m.fsChildren[pid]; exists {
+		newChildren := []string{}
+		for _, childId := range children {
+			if childId != id {
+				newChildren = append(newChildren, childId)
+			}
+		}
+		m.fsChildren[pid] = newChildren
+	}
+	return nil
+}
+
+func (m *MockCoordinator) GetFsNodeChildren(ctx context.Context, id string) ([]*BlobFsMetadata, error) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	
+	childIds, exists := m.fsChildren[id]
+	if !exists {
+		return []*BlobFsMetadata{}, nil
+	}
+	
+	children := []*BlobFsMetadata{}
+	for _, childId := range childIds {
+		if metadata, exists := m.fsNodes[childId]; exists {
+			children = append(children, metadata)
+		}
+	}
+	return children, nil
+}
+
+func (m *MockCoordinator) AddFsNodeChild(ctx context.Context, pid, id string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	
+	if _, exists := m.fsChildren[pid]; !exists {
+		m.fsChildren[pid] = []string{}
+	}
+	
+	// Don't add duplicates
+	for _, existingId := range m.fsChildren[pid] {
+		if existingId == id {
+			return nil
+		}
+	}
+	
+	m.fsChildren[pid] = append(m.fsChildren[pid], id)
+	return nil
+}
diff --git a/pkg/fadvise.go b/pkg/fadvise.go
new file mode 100644
index 0000000..27a7821
--- /dev/null
+++ b/pkg/fadvise.go
@@ -0,0 +1,34 @@
+// +build linux
+
+package blobcache
+
+import (
+	"golang.org/x/sys/unix"
+)
+
+// fadvise wrapper functions for optimizing disk I/O patterns
+// These hint the kernel about intended access patterns for better performance
+
+// fadviseSequential hints that the file will be read sequentially
+// This triggers more aggressive kernel readahead
+func fadviseSequential(fd uintptr) error {
+	return unix.Fadvise(int(fd), 0, 0, unix.FADV_SEQUENTIAL)
+}
+
+// fadviseWillneed hints that data will be accessed soon
+// This can trigger asynchronous prefetch into page cache
+func fadviseWillneed(fd uintptr, offset, length int64) error {
+	return unix.Fadvise(int(fd), offset, length, unix.FADV_WILLNEED)
+}
+
+// fadviseDontneed hints that data won't be needed anymore
+// Useful for streaming workloads to avoid cache pollution
+func fadviseDontneed(fd uintptr, offset, length int64) error {
+	return unix.Fadvise(int(fd), offset, length, unix.FADV_DONTNEED)
+}
+
+// fadviseRandom hints that file will be accessed randomly
+// This disables readahead to avoid wasting I/O bandwidth
+func fadviseRandom(fd uintptr) error {
+	return unix.Fadvise(int(fd), 0, 0, unix.FADV_RANDOM)
+}
diff --git a/pkg/fadvise_other.go b/pkg/fadvise_other.go
new file mode 100644
index 0000000..c4bbf68
--- /dev/null
+++ b/pkg/fadvise_other.go
@@ -0,0 +1,22 @@
+// +build !linux
+
+package blobcache
+
+// fadvise stubs for non-Linux platforms
+// These are no-ops on platforms that don't support fadvise
+
+func fadviseSequential(fd uintptr) error {
+	return nil // No-op on non-Linux
+}
+
+func fadviseWillneed(fd uintptr, offset, length int64) error {
+	return nil // No-op on non-Linux
+}
+
+func fadviseDontneed(fd uintptr, offset, length int64) error {
+	return nil // No-op on non-Linux
+}
+
+func fadviseRandom(fd uintptr) error {
+	return nil // No-op on non-Linux
+}
diff --git a/pkg/getcontent_bench_test.go b/pkg/getcontent_bench_test.go
new file mode 100644
index 0000000..ac5a17c
--- /dev/null
+++ b/pkg/getcontent_bench_test.go
@@ -0,0 +1,98 @@
+package blobcache
+
+import (
+	"context"
+	"crypto/rand"
+	"testing"
+)
+
+func BenchmarkGetContentDiskCache(b *testing.B) {
+	InitLogger(false, false)
+	
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	tmpDir := b.TempDir()
+
+	config := BlobCacheConfig{
+		Server: BlobCacheServerConfig{
+			DiskCacheDir:         tmpDir,
+			DiskCacheMaxUsagePct: 90,
+			MaxCachePct:          0,
+			PageSizeBytes:        4 * 1024 * 1024,
+			ObjectTtlS:           300,
+		},
+		Global: BlobCacheGlobalConfig{
+			DebugMode: false,
+		},
+		Metrics: BlobCacheMetricsConfig{
+			URL: "",
+		},
+	}
+
+	currentHost := &BlobCacheHost{
+		HostId: "bench-host",
+		Addr:   "localhost:50051",
+		RTT:    0,
+	}
+
+	mockCoordinator := NewMockCoordinator()
+	cas, err := NewContentAddressableStorage(ctx, currentHost, "local", mockCoordinator, config)
+	if err != nil {
+		b.Fatalf("Failed to create CAS: %v", err)
+	}
+	defer cas.Cleanup()
+
+	// Store 16MB file
+	fileSize := int64(16 * 1024 * 1024)
+	content := make([]byte, fileSize)
+	rand.Read(content)
+	hash := "test-file"
+
+	err = cas.Add(context.Background(), hash, content)
+	if err != nil {
+		b.Fatalf("Failed to add content: %v", err)
+	}
+
+	// Read in 4MB chunks (realistic usage)
+	chunkSize := int64(4 * 1024 * 1024)
+	dst := make([]byte, chunkSize)
+
+	b.ResetTimer()
+	b.SetBytes(fileSize)
+
+	for i := 0; i < b.N; i++ {
+		offset := int64(0)
+		totalRead := int64(0)
+
+		for offset < fileSize {
+			readSize := chunkSize
+			if offset+readSize > fileSize {
+				readSize = fileSize - offset
+			}
+
+			n, err := cas.Get(hash, offset, readSize, dst)
+			if err != nil {
+				b.Fatalf("GetContent failed at offset %d: %v", offset, err)
+			}
+			if n != readSize {
+				b.Fatalf("Expected %d bytes, got %d", readSize, n)
+			}
+
+			if offset == 0 && i == 0 {
+				for j := int64(0); j < 1024 && j < n; j++ {
+					if dst[j] != content[j] {
+						b.Fatalf("Data mismatch at byte %d", j)
+					}
+				}
+			}
+
+			offset += n
+			totalRead += n
+		}
+
+		if totalRead != fileSize {
+			b.Fatalf("Read %d bytes, expected %d", totalRead, fileSize)
+		}
+	}
+}
diff --git a/pkg/logger.go b/pkg/logger.go
index 0d91402..32ca040 100644
--- a/pkg/logger.go
+++ b/pkg/logger.go
@@ -91,6 +91,9 @@ func (l *logger) Error(msg string, fields ...any) {
 }
 
 func (l *logger) Errorf(template string, args ...interface{}) {
+	if l == nil {
+		return
+	}
 	l.logger.Error().Msgf(template, args...)
 }
 
diff --git a/pkg/metrics.go b/pkg/metrics.go
index be5f26a..1ce4b22 100644
--- a/pkg/metrics.go
+++ b/pkg/metrics.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/base64"
 	"fmt"
+	"sync"
 	"time"
 
 	"github.com/VictoriaMetrics/metrics"
@@ -14,38 +15,115 @@ type BlobcacheMetrics struct {
 	DiskCacheUsagePct *metrics.Histogram
 	MemCacheUsageMB   *metrics.Histogram
 	MemCacheUsagePct  *metrics.Histogram
+	
+	// Cache tier hit ratios
+	L0HitRatio        *metrics.Histogram // In-memory cache hits
+	L1HitRatio        *metrics.Histogram // Disk cache hits
+	L2MissRatio       *metrics.Histogram // Remote fetch required
+	
+	// Operation counters
+	L0Hits            *metrics.Counter
+	L1Hits            *metrics.Counter
+	L2Misses          *metrics.Counter
+	TotalReads        *metrics.Counter
+	
+	// Bytes served per tier
+	L0BytesServed     *metrics.Counter
+	L1BytesServed     *metrics.Counter
+	L2BytesFetched    *metrics.Counter
+	
+	// FUSE operation latencies
+	FUSEReadLatency   *metrics.Histogram
+	FUSELookupLatency *metrics.Histogram
+	FUSEGetattrLatency *metrics.Histogram
+	
+	// Read throughput
+	ReadThroughputMBps *metrics.Histogram
 }
 
+var (
+	globalMetrics     BlobcacheMetrics
+	metricsInitOnce   sync.Once
+)
+
 func initMetrics(ctx context.Context, config BlobCacheMetricsConfig, currentHost *BlobCacheHost, locality string) BlobcacheMetrics {
-	username := config.Username
-	password := config.Password
-	credentials := base64.StdEncoding.EncodeToString([]byte(username + ":" + password))
-
-	opts := &metrics.PushOptions{
-		Headers: []string{
-			fmt.Sprintf("Authorization: Basic %s", credentials),
-		},
-		ExtraLabels: "host=\"" + currentHost.HostId + "\",locality=\"" + locality + "\"",
-	}
+	metricsInitOnce.Do(func() {
+		globalMetrics = createMetrics(ctx, config, currentHost, locality)
+	})
+	return globalMetrics
+}
+
+func createMetrics(ctx context.Context, config BlobCacheMetricsConfig, currentHost *BlobCacheHost, locality string) BlobcacheMetrics {
+	// Only initialize metrics push if URL is configured
+	if config.URL != "" {
+		username := config.Username
+		password := config.Password
+		credentials := base64.StdEncoding.EncodeToString([]byte(username + ":" + password))
+
+		opts := &metrics.PushOptions{
+			Headers: []string{
+				fmt.Sprintf("Authorization: Basic %s", credentials),
+			},
+			ExtraLabels: "host=\"" + currentHost.HostId + "\",locality=\"" + locality + "\"",
+		}
 
-	pushURL := config.URL
-	interval := time.Duration(config.PushIntervalS) * time.Second
-	pushProcessMetrics := true
+		pushURL := config.URL
+		interval := time.Duration(config.PushIntervalS) * time.Second
+		pushProcessMetrics := true
 
-	err := metrics.InitPushWithOptions(ctx, pushURL, interval, pushProcessMetrics, opts)
-	if err != nil {
-		Logger.Errorf("Failed to initialize metrics: %v", err)
+		err := metrics.InitPushWithOptions(ctx, pushURL, interval, pushProcessMetrics, opts)
+		if err != nil && Logger != nil {
+			Logger.Errorf("Failed to initialize metrics: %v", err)
+		}
 	}
 
 	diskCacheUsageMB := metrics.NewHistogram(`blobcache_disk_cache_usage_mb`)
 	diskCacheUsagePct := metrics.NewHistogram(`blobcache_disk_cache_usage_pct`)
 	memCacheUsageMB := metrics.NewHistogram(`blobcache_mem_cache_usage_mb`)
 	memCacheUsagePct := metrics.NewHistogram(`blobcache_mem_cache_usage_pct`)
+	
+	// Cache tier metrics
+	l0HitRatio := metrics.NewHistogram(`blobcache_l0_hit_ratio`)
+	l1HitRatio := metrics.NewHistogram(`blobcache_l1_hit_ratio`)
+	l2MissRatio := metrics.NewHistogram(`blobcache_l2_miss_ratio`)
+	
+	// Operation counters
+	l0Hits := metrics.NewCounter(`blobcache_l0_hits_total`)
+	l1Hits := metrics.NewCounter(`blobcache_l1_hits_total`)
+	l2Misses := metrics.NewCounter(`blobcache_l2_misses_total`)
+	totalReads := metrics.NewCounter(`blobcache_reads_total`)
+	
+	// Bytes served
+	l0BytesServed := metrics.NewCounter(`blobcache_l0_bytes_served_total`)
+	l1BytesServed := metrics.NewCounter(`blobcache_l1_bytes_served_total`)
+	l2BytesFetched := metrics.NewCounter(`blobcache_l2_bytes_fetched_total`)
+	
+	// FUSE latencies
+	fuseReadLatency := metrics.NewHistogram(`blobcache_fuse_read_latency_ms`)
+	fuseLookupLatency := metrics.NewHistogram(`blobcache_fuse_lookup_latency_ms`)
+	fuseGetattrLatency := metrics.NewHistogram(`blobcache_fuse_getattr_latency_ms`)
+	
+	// Throughput
+	readThroughputMBps := metrics.NewHistogram(`blobcache_read_throughput_mbps`)
 
 	return BlobcacheMetrics{
-		DiskCacheUsageMB:  diskCacheUsageMB,
-		DiskCacheUsagePct: diskCacheUsagePct,
-		MemCacheUsageMB:   memCacheUsageMB,
-		MemCacheUsagePct:  memCacheUsagePct,
+		DiskCacheUsageMB:   diskCacheUsageMB,
+		DiskCacheUsagePct:  diskCacheUsagePct,
+		MemCacheUsageMB:    memCacheUsageMB,
+		MemCacheUsagePct:   memCacheUsagePct,
+		L0HitRatio:         l0HitRatio,
+		L1HitRatio:         l1HitRatio,
+		L2MissRatio:        l2MissRatio,
+		L0Hits:             l0Hits,
+		L1Hits:             l1Hits,
+		L2Misses:           l2Misses,
+		TotalReads:         totalReads,
+		L0BytesServed:      l0BytesServed,
+		L1BytesServed:      l1BytesServed,
+		L2BytesFetched:     l2BytesFetched,
+		FUSEReadLatency:    fuseReadLatency,
+		FUSELookupLatency:  fuseLookupLatency,
+		FUSEGetattrLatency: fuseGetattrLatency,
+		ReadThroughputMBps: readThroughputMBps,
 	}
 }
diff --git a/pkg/prefetcher.go b/pkg/prefetcher.go
new file mode 100644
index 0000000..1b14570
--- /dev/null
+++ b/pkg/prefetcher.go
@@ -0,0 +1,188 @@
+package blobcache
+
+import (
+	"context"
+	"sync"
+	"time"
+)
+
+const (
+	// Prefetch configuration aligned with optimization plan
+	prefetchAheadChunks    = 16                      // Prefetch 16-64 MiB ahead with 4MB chunks
+	prefetchThresholdBytes = 2 * 1024 * 1024         // 2MB - detect sequential after 2 adjacent reads
+	prefetchWorkers        = 4                       // Parallel prefetch workers
+	prefetchCacheTime      = 30 * time.Second        // How long to keep prefetch state
+)
+
+// PrefetchState tracks sequential read patterns per file/hash
+type PrefetchState struct {
+	lastOffset     int64
+	lastAccessTime time.Time
+	sequential     bool
+	prefetching    bool
+}
+
+// Prefetcher detects sequential reads and prefetches ahead
+type Prefetcher struct {
+	ctx        context.Context
+	cas        *ContentAddressableStorage
+	states     map[string]*PrefetchState
+	mu         sync.RWMutex
+	workQueue  chan *prefetchTask
+	bufferPool *BufferPool
+}
+
+type prefetchTask struct {
+	hash   string
+	offset int64
+	length int64
+}
+
+// NewPrefetcher creates a new prefetcher instance
+func NewPrefetcher(ctx context.Context, cas *ContentAddressableStorage, bufferPool *BufferPool) *Prefetcher {
+	pf := &Prefetcher{
+		ctx:        ctx,
+		cas:        cas,
+		states:     make(map[string]*PrefetchState),
+		workQueue:  make(chan *prefetchTask, 100),
+		bufferPool: bufferPool,
+	}
+
+	// Start prefetch workers
+	for i := 0; i < prefetchWorkers; i++ {
+		go pf.worker()
+	}
+
+	// Cleanup stale states periodically
+	go pf.cleanupWorker()
+
+	return pf
+}
+
+// OnRead should be called on each read to detect patterns
+func (pf *Prefetcher) OnRead(hash string, offset, length int64) {
+	pf.mu.Lock()
+	defer pf.mu.Unlock()
+
+	now := time.Now()
+	state, exists := pf.states[hash]
+	
+	if !exists {
+		state = &PrefetchState{
+			lastOffset:     offset,
+			lastAccessTime: now,
+			sequential:     false,
+			prefetching:    false,
+		}
+		pf.states[hash] = state
+		return
+	}
+
+	// Check if this read is sequential (adjacent or nearly adjacent)
+	isSequential := offset >= state.lastOffset && 
+		(offset - state.lastOffset) <= prefetchThresholdBytes
+
+	if isSequential && !state.sequential {
+		// Just detected sequential pattern, start prefetching
+		state.sequential = true
+		pf.startPrefetch(hash, offset+length)
+	} else if isSequential && state.sequential && !state.prefetching {
+		// Continue prefetching for ongoing sequential reads
+		pf.startPrefetch(hash, offset+length)
+	} else if !isSequential {
+		// Pattern broken, mark as non-sequential
+		state.sequential = false
+	}
+
+	state.lastOffset = offset + length
+	state.lastAccessTime = now
+}
+
+// startPrefetch queues prefetch tasks for future chunks
+func (pf *Prefetcher) startPrefetch(hash string, startOffset int64) {
+	state := pf.states[hash]
+	if state.prefetching {
+		return
+	}
+
+	state.prefetching = true
+
+	go func() {
+		defer func() {
+			pf.mu.Lock()
+			if s, exists := pf.states[hash]; exists {
+				s.prefetching = false
+			}
+			pf.mu.Unlock()
+		}()
+
+		// Queue multiple chunks for prefetch
+		chunkSize := int64(4 * 1024 * 1024) // 4MB chunks
+		for i := int64(0); i < prefetchAheadChunks; i++ {
+			offset := startOffset + (i * chunkSize)
+			
+			select {
+			case pf.workQueue <- &prefetchTask{
+				hash:   hash,
+				offset: offset,
+				length: chunkSize,
+			}:
+			case <-pf.ctx.Done():
+				return
+			default:
+				// Queue full, stop prefetching
+				return
+			}
+		}
+	}()
+}
+
+// worker processes prefetch tasks
+func (pf *Prefetcher) worker() {
+	for {
+		select {
+		case task := <-pf.workQueue:
+			// Check if content is already in cache
+			if pf.cas.Exists(task.hash) {
+				// Try to warm up the chunk in memory if it's on disk
+				dst := pf.bufferPool.Get(int(task.length))
+				_, err := pf.cas.Get(task.hash, task.offset, task.length, dst)
+				pf.bufferPool.Put(dst)
+				
+				if err == nil {
+					Logger.Debugf("Prefetched chunk [%s] offset=%d length=%d", 
+						task.hash, task.offset, task.length)
+				}
+			}
+		case <-pf.ctx.Done():
+			return
+		}
+	}
+}
+
+// cleanupWorker removes stale prefetch states
+func (pf *Prefetcher) cleanupWorker() {
+	ticker := time.NewTicker(prefetchCacheTime)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			pf.cleanup()
+		case <-pf.ctx.Done():
+			return
+		}
+	}
+}
+
+func (pf *Prefetcher) cleanup() {
+	pf.mu.Lock()
+	defer pf.mu.Unlock()
+
+	now := time.Now()
+	for hash, state := range pf.states {
+		if now.Sub(state.lastAccessTime) > prefetchCacheTime {
+			delete(pf.states, hash)
+		}
+	}
+}
diff --git a/pkg/server.go b/pkg/server.go
index 951004e..9af60dc 100644
--- a/pkg/server.go
+++ b/pkg/server.go
@@ -186,11 +186,46 @@ func (cs *CacheService) StartServer(port uint) error {
 	}
 
 	maxMessageSize := cs.globalConfig.GRPCMessageSizeBytes
+	
+	initialWindowSize := cs.globalConfig.GRPCInitialWindowSize
+	if initialWindowSize == 0 {
+		initialWindowSize = 4 * 1024 * 1024
+	}
+	
+	initialConnWindowSize := cs.globalConfig.GRPCInitialConnWindowSize
+	if initialConnWindowSize == 0 {
+		initialConnWindowSize = 32 * 1024 * 1024
+	}
+	
+	writeBufferSize := cs.globalConfig.GRPCWriteBufferSize
+	if writeBufferSize == 0 {
+		writeBufferSize = 256 * 1024
+	}
+	
+	readBufferSize := cs.globalConfig.GRPCReadBufferSize
+	if readBufferSize == 0 {
+		readBufferSize = 256 * 1024
+	}
+	
+	maxConcurrentStreams := cs.globalConfig.GRPCMaxConcurrentStreams
+	if maxConcurrentStreams == 0 {
+		maxConcurrentStreams = 1024
+	}
+	
+	numStreamWorkers := cs.globalConfig.GRPCNumStreamWorkers
+	if numStreamWorkers == 0 {
+		numStreamWorkers = runtime.NumCPU() * 2
+	}
+
 	s := grpc.NewServer(
 		grpc.MaxRecvMsgSize(maxMessageSize),
 		grpc.MaxSendMsgSize(maxMessageSize),
-		grpc.WriteBufferSize(writeBufferSizeBytes),
-		grpc.NumStreamWorkers(uint32(runtime.NumCPU())),
+		grpc.InitialWindowSize(int32(initialWindowSize)),
+		grpc.InitialConnWindowSize(int32(initialConnWindowSize)),
+		grpc.WriteBufferSize(writeBufferSize),
+		grpc.ReadBufferSize(readBufferSize),
+		grpc.MaxConcurrentStreams(uint32(maxConcurrentStreams)),
+		grpc.NumStreamWorkers(uint32(numStreamWorkers)),
 	)
 	proto.RegisterBlobCacheServer(s, cs)
 
diff --git a/pkg/storage.go b/pkg/storage.go
index 6552d7a..89643eb 100644
--- a/pkg/storage.go
+++ b/pkg/storage.go
@@ -33,10 +33,12 @@ type ContentAddressableStorage struct {
 	diskCachedUsageExceeded bool
 	mu                      sync.Mutex
 	metrics                 BlobcacheMetrics
+	bufferPool              *BufferPool
+	prefetcher              *Prefetcher
 }
 
 func NewContentAddressableStorage(ctx context.Context, currentHost *BlobCacheHost, locality string, coordinator CoordinatorClient, config BlobCacheConfig) (*ContentAddressableStorage, error) {
-	if config.Server.MaxCachePct <= 0 || config.Server.PageSizeBytes <= 0 {
+	if config.Server.PageSizeBytes <= 0 {
 		return nil, errors.New("invalid cache configuration")
 	}
 
@@ -54,33 +56,61 @@ func NewContentAddressableStorage(ctx context.Context, currentHost *BlobCacheHos
 
 	Logger.Infof("Disk cache directory located at: '%s'", cas.diskCacheDir)
 
-	_, totalMemoryMb := getMemoryMb()
-	maxCacheSizeMb := (totalMemoryMb * cas.serverConfig.MaxCachePct) / 100
-	maxCost := maxCacheSizeMb * 1e6
+	// Memory cache enabled by default if MaxCachePct > 0 (backwards compatible)
+	// Set MaxCachePct to 0 for disk-only mode
+	enableMemCache := config.Server.MaxCachePct > 0
+	
+	if enableMemCache {
+		_, totalMemoryMb := getMemoryMb()
+		maxCacheSizeMb := (totalMemoryMb * cas.serverConfig.MaxCachePct) / 100
+		maxCost := maxCacheSizeMb * 1e6
+
+		Logger.Infof("Memory cache ENABLED")
+		Logger.Infof("Total available memory: %dMB", totalMemoryMb)
+		Logger.Infof("Max cache size: %dMB", maxCacheSizeMb)
+		Logger.Infof("Max cost: %d", maxCost)
+
+		if maxCacheSizeMb <= 0 {
+			return nil, errors.New("invalid memory limit")
+		}
 
-	Logger.Infof("Total available memory: %dMB", totalMemoryMb)
-	Logger.Infof("Max cache size: %dMB", maxCacheSizeMb)
-	Logger.Infof("Max cost: %d", maxCost)
+		cache, err := ristretto.NewCache(&ristretto.Config[string, interface{}]{
+			NumCounters: 1e7,
+			MaxCost:     maxCost,
+			BufferItems: 64,
+			OnEvict:     cas.onEvict,
+			Metrics:     cas.globalConfig.DebugMode,
+		})
+		if err != nil {
+			return nil, err
+		}
 
-	if maxCacheSizeMb <= 0 {
-		return nil, errors.New("invalid memory limit")
+		cas.cache = cache
+		cas.maxCacheSizeMb = maxCacheSizeMb
+	} else {
+		Logger.Infof("Memory cache DISABLED (disk-only mode)")
+		// Create a minimal cache just for metadata tracking
+		cache, _ := ristretto.NewCache(&ristretto.Config[string, interface{}]{
+			NumCounters: 1e5,
+			MaxCost:     1024 * 1024, // 1MB for metadata only
+			BufferItems: 64,
+			OnEvict:     cas.onEvict,
+			Metrics:     false,
+		})
+		cas.cache = cache
 	}
 
-	cache, err := ristretto.NewCache(&ristretto.Config[string, interface{}]{
-		NumCounters: 1e7,
-		MaxCost:     maxCost,
-		BufferItems: 64,
-		OnEvict:     cas.onEvict,
-		Metrics:     cas.globalConfig.DebugMode,
-	})
-	if err != nil {
-		return nil, err
+	// Only start disk monitor if we have a metrics URL (not in benchmarks/tests)
+	if config.Metrics.URL != "" {
+		go cas.monitorDiskCacheUsage()
 	}
-
-	go cas.monitorDiskCacheUsage()
-
-	cas.cache = cache
-	cas.maxCacheSizeMb = maxCacheSizeMb
+	
+	// Initialize buffer pool for reduced allocations
+	cas.bufferPool = NewBufferPool()
+	
+	// Initialize prefetcher for sequential read optimization
+	cas.prefetcher = NewPrefetcher(ctx, cas, cas.bufferPool)
+	
 	return cas, nil
 }
 
@@ -174,6 +204,21 @@ func (cas *ContentAddressableStorage) Get(hash string, offset, length int64, dst
 	remainingLength := length
 	o := offset
 	dstOffset := int64(0)
+	
+	// Track metrics
+	cas.metrics.TotalReads.Inc()
+	start := time.Now()
+	defer func() {
+		if dstOffset > 0 {
+			throughputMBps := (float64(dstOffset) / (1024 * 1024)) / (float64(time.Since(start).Microseconds()) / 1e6)
+			cas.metrics.ReadThroughputMBps.Update(throughputMBps)
+		}
+		// Update hit ratios
+		cas.updateHitRatios()
+	}()
+	
+	// Notify prefetcher about this read
+	cas.prefetcher.OnRead(hash, offset, length)
 
 	cas.cache.ResetTTL(hash, time.Duration(cas.serverConfig.ObjectTtlS)*time.Second)
 
@@ -184,16 +229,22 @@ func (cas *ContentAddressableStorage) Get(hash string, offset, length int64, dst
 		var value interface{}
 		var found bool = false
 
-		// Check in-memory cache for chunk
+		// Check in-memory cache for chunk (L0)
 		value, found = cas.cache.Get(chunkKey)
+		fromMemory := found
+		if found {
+			cas.metrics.L0Hits.Inc()
+		}
 
-		// Not found in memory, check disk cache before giving up
+		// Not found in memory, check disk cache (L1) before giving up
 		if !found {
 			var err error
 			value, found, err = cas.getFromDiskCache(hash, chunkKey)
 			if err != nil || !found {
+				cas.metrics.L2Misses.Inc()
 				return 0, ErrContentNotFound
 			}
+			cas.metrics.L1Hits.Inc()
 		}
 
 		v, ok := value.(cacheValue)
@@ -216,6 +267,13 @@ func (cas *ContentAddressableStorage) Get(hash string, offset, length int64, dst
 		}
 
 		copy(dst[dstOffset:dstOffset+readLength], chunkBytes[start:end])
+		
+		// Track bytes served from appropriate tier
+		if fromMemory {
+			cas.metrics.L0BytesServed.Add(int(readLength))
+		} else {
+			cas.metrics.L1BytesServed.Add(int(readLength))
+		}
 
 		remainingLength -= readLength
 		o += readLength
@@ -235,10 +293,28 @@ func (cas *ContentAddressableStorage) getFromDiskCache(hash, chunkKey string) (v
 	}
 
 	chunkPath := filepath.Join(cas.diskCacheDir, hash, chunkKey)
+	
+	// Use fadvise to hint sequential/random access patterns
+	file, err := os.Open(chunkPath)
+	if err != nil {
+		return cacheValue{}, false, ErrContentNotFound
+	}
+	defer file.Close()
+	
+	// Hint sequential access for better readahead
+	if err := fadviseSequential(file.Fd()); err == nil {
+		Logger.Debugf("Set FADV_SEQUENTIAL for %s", chunkPath)
+	}
+	
 	chunkBytes, err := os.ReadFile(chunkPath)
 	if err != nil {
 		return cacheValue{}, false, ErrContentNotFound
 	}
+	
+	// Hint willneed for prefetch
+	if err := fadviseWillneed(file.Fd(), 0, int64(len(chunkBytes))); err == nil {
+		Logger.Debugf("Set FADV_WILLNEED for %s", chunkPath)
+	}
 
 	value = cacheValue{Hash: hash, Content: chunkBytes}
 	cas.cache.Set(chunkKey, value, int64(len(chunkBytes)))
@@ -360,7 +436,10 @@ func (cas *ContentAddressableStorage) monitorDiskCacheUsage() {
 		case <-ticker.C:
 			currentUsage, totalDiskSpace, usagePercentage, err := cas.GetDiskCacheMetrics()
 			if err != nil {
-				Logger.Errorf("Failed to fetch disk cache metrics: %v", err)
+				// Silently skip if directory doesn't exist (common in tests/benchmarks)
+				if !os.IsNotExist(err) {
+					Logger.Errorf("Failed to fetch disk cache metrics: %v", err)
+				}
 				continue
 			}
 
@@ -381,3 +460,21 @@ func (cas *ContentAddressableStorage) monitorDiskCacheUsage() {
 		}
 	}
 }
+
+// updateHitRatios calculates and updates cache hit ratio metrics
+func (cas *ContentAddressableStorage) updateHitRatios() {
+	l0Hits := cas.metrics.L0Hits.Get()
+	l1Hits := cas.metrics.L1Hits.Get()
+	l2Misses := cas.metrics.L2Misses.Get()
+	total := l0Hits + l1Hits + l2Misses
+	
+	if total > 0 {
+		l0Ratio := float64(l0Hits) / float64(total) * 100
+		l1Ratio := float64(l1Hits) / float64(total) * 100
+		l2Ratio := float64(l2Misses) / float64(total) * 100
+		
+		cas.metrics.L0HitRatio.Update(l0Ratio)
+		cas.metrics.L1HitRatio.Update(l1Ratio)
+		cas.metrics.L2MissRatio.Update(l2Ratio)
+	}
+}
diff --git a/pkg/storage_bench_test.go b/pkg/storage_bench_test.go
new file mode 100644
index 0000000..0ab2985
--- /dev/null
+++ b/pkg/storage_bench_test.go
@@ -0,0 +1,304 @@
+package blobcache
+
+import (
+	"context"
+	"crypto/rand"
+	"fmt"
+	"os"
+	"sync"
+	"testing"
+	"time"
+)
+
+var benchmarkSetupOnce sync.Once
+
+func setupBenchmarkCAS(b *testing.B) (*ContentAddressableStorage, func()) {
+	ctx, cancel := context.WithCancel(context.Background())
+	tmpDir := b.TempDir()
+	
+	benchmarkSetupOnce.Do(func() {
+		InitLogger(false, false)
+	})
+	
+	config := BlobCacheConfig{
+		Server: BlobCacheServerConfig{
+			DiskCacheDir:         tmpDir,
+			DiskCacheMaxUsagePct: 90,
+			MaxCachePct:          50,
+			PageSizeBytes:        4 * 1024 * 1024,
+			ObjectTtlS:           300,
+		},
+		Global: BlobCacheGlobalConfig{
+			DebugMode: false,
+		},
+		Metrics: BlobCacheMetricsConfig{
+			PushIntervalS: 60,
+			URL:           "",
+		},
+	}
+	
+	currentHost := &BlobCacheHost{
+		HostId:   "bench-host",
+		Addr:     "localhost:50051",
+		RTT:      0,
+	}
+	
+	mockCoordinator := NewMockCoordinator()
+	cas, err := NewContentAddressableStorage(ctx, currentHost, "local", mockCoordinator, config)
+	if err != nil {
+		b.Fatalf("Failed to create CAS: %v", err)
+	}
+	
+	cleanup := func() {
+		cancel()
+		cas.Cleanup()
+		os.RemoveAll(tmpDir)
+	}
+	
+	return cas, cleanup
+}
+
+func BenchmarkSequentialRead(b *testing.B) {
+	sizes := []int64{
+		1 * 1024 * 1024,
+		4 * 1024 * 1024,
+	}
+	
+	for _, size := range sizes {
+		b.Run(fmt.Sprintf("size_%dMB", size/(1024*1024)), func(b *testing.B) {
+			cas, cleanup := setupBenchmarkCAS(b)
+			defer cleanup()
+			
+			// Generate test data
+			content := make([]byte, size)
+			rand.Read(content)
+			hash := fmt.Sprintf("test-hash-%d", size)
+			
+			err := cas.Add(context.Background(), hash, content)
+			if err != nil {
+				b.Fatalf("Failed to add content: %v", err)
+			}
+			
+			dst := make([]byte, 1024*1024) // 1MB read buffer
+			
+			b.ResetTimer()
+			b.SetBytes(size)
+			
+			for i := 0; i < b.N; i++ {
+				offset := int64(0)
+				for offset < size {
+					readLen := int64(len(dst))
+					if offset+readLen > size {
+						readLen = size - offset
+					}
+					
+					n, err := cas.Get(hash, offset, readLen, dst)
+					if err != nil {
+						b.Fatalf("Failed to read: %v", err)
+					}
+					offset += n
+				}
+			}
+		})
+	}
+}
+
+func BenchmarkRandomRead(b *testing.B) {
+	chunkSizes := []int64{
+		64 * 1024,
+		512 * 1024,
+	}
+	
+	fileSize := int64(16 * 1024 * 1024)
+	
+	for _, chunkSize := range chunkSizes {
+		b.Run(fmt.Sprintf("chunk_%dKB", chunkSize/1024), func(b *testing.B) {
+			cas, cleanup := setupBenchmarkCAS(b)
+			defer cleanup()
+			
+			// Generate test data
+			content := make([]byte, fileSize)
+			rand.Read(content)
+			hash := "random-test-hash"
+			
+			err := cas.Add(context.Background(), hash, content)
+			if err != nil {
+				b.Fatalf("Failed to add content: %v", err)
+			}
+			
+			dst := make([]byte, chunkSize)
+			
+			b.ResetTimer()
+			b.SetBytes(chunkSize)
+			
+			for i := 0; i < b.N; i++ {
+				// Random offset
+				offset := int64(i*13) % (fileSize - chunkSize)
+				
+				_, err := cas.Get(hash, offset, chunkSize, dst)
+				if err != nil {
+					b.Fatalf("Failed to read: %v", err)
+				}
+			}
+		})
+	}
+}
+
+func BenchmarkSmallFiles(b *testing.B) {
+	fileSizes := []int{10 * 1024, 100 * 1024}
+	
+	for _, fileSize := range fileSizes {
+		b.Run(fmt.Sprintf("size_%dKB", fileSize/1024), func(b *testing.B) {
+			cas, cleanup := setupBenchmarkCAS(b)
+			defer cleanup()
+			
+			numFiles := 100
+			hashes := make([]string, numFiles)
+			
+			for i := 0; i < numFiles; i++ {
+				content := make([]byte, fileSize)
+				rand.Read(content)
+				hash := fmt.Sprintf("small-file-%d", i)
+				hashes[i] = hash
+				
+				err := cas.Add(context.Background(), hash, content)
+				if err != nil {
+					b.Fatalf("Failed to add content: %v", err)
+				}
+			}
+			
+			dst := make([]byte, fileSize)
+			
+			b.ResetTimer()
+			b.SetBytes(int64(fileSize))
+			
+			for i := 0; i < b.N; i++ {
+				hash := hashes[i%numFiles]
+				_, err := cas.Get(hash, 0, int64(fileSize), dst)
+				if err != nil {
+					b.Fatalf("Failed to read: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkCacheHitRatios measures L0/L1 cache performance
+func BenchmarkCacheHitRatios(b *testing.B) {
+	cas, cleanup := setupBenchmarkCAS(b)
+	defer cleanup()
+	
+	fileSize := int64(16 * 1024 * 1024) // 16 MB
+	content := make([]byte, fileSize)
+	rand.Read(content)
+	hash := "cache-test-hash"
+	
+	err := cas.Add(context.Background(), hash, content)
+	if err != nil {
+		b.Fatalf("Failed to add content: %v", err)
+	}
+	
+	dst := make([]byte, 1024*1024) // 1MB chunks
+	
+	b.Run("L0_Hot_Cache", func(b *testing.B) {
+		// Warm up cache
+		cas.Get(hash, 0, int64(len(dst)), dst)
+		
+		b.ResetTimer()
+		b.SetBytes(int64(len(dst)))
+		
+		for i := 0; i < b.N; i++ {
+			_, err := cas.Get(hash, 0, int64(len(dst)), dst)
+			if err != nil {
+				b.Fatalf("Failed to read: %v", err)
+			}
+		}
+	})
+	
+	b.Run("L1_Disk_Cache", func(b *testing.B) {
+		// Clear in-memory cache but keep disk cache
+		// Note: This is a simplified test, actual implementation may vary
+		
+		b.ResetTimer()
+		b.SetBytes(int64(len(dst)))
+		
+		for i := 0; i < b.N; i++ {
+			offset := int64((i % 4) * len(dst)) // Rotate through chunks
+			_, err := cas.Get(hash, offset, int64(len(dst)), dst)
+			if err != nil {
+				b.Fatalf("Failed to read: %v", err)
+			}
+		}
+	})
+}
+
+// BenchmarkPrefetcher tests prefetcher effectiveness
+func BenchmarkPrefetcher(b *testing.B) {
+	cas, cleanup := setupBenchmarkCAS(b)
+	defer cleanup()
+	
+	fileSize := int64(64 * 1024 * 1024) // 64 MB
+	content := make([]byte, fileSize)
+	rand.Read(content)
+	hash := "prefetch-test-hash"
+	
+	err := cas.Add(context.Background(), hash, content)
+	if err != nil {
+		b.Fatalf("Failed to add content: %v", err)
+	}
+	
+	chunkSize := int64(4 * 1024 * 1024) // 4MB chunks
+	dst := make([]byte, chunkSize)
+	
+	b.ResetTimer()
+	b.SetBytes(fileSize)
+	
+	for i := 0; i < b.N; i++ {
+		// Sequential reads to trigger prefetcher
+		for offset := int64(0); offset < fileSize; offset += chunkSize {
+			readLen := chunkSize
+			if offset+readLen > fileSize {
+				readLen = fileSize - offset
+			}
+			
+			_, err := cas.Get(hash, offset, readLen, dst)
+			if err != nil {
+				b.Fatalf("Failed to read: %v", err)
+			}
+		}
+		
+		// Small delay to let prefetcher work
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+// BenchmarkBufferPool tests buffer allocation performance
+func BenchmarkBufferPool(b *testing.B) {
+	pool := NewBufferPool()
+	
+	sizes := []int{
+		1 * 1024 * 1024,  // 1MB
+		4 * 1024 * 1024,  // 4MB
+		16 * 1024 * 1024, // 16MB
+	}
+	
+	for _, size := range sizes {
+		b.Run(fmt.Sprintf("size_%dMB", size/(1024*1024)), func(b *testing.B) {
+			b.Run("WithPool", func(b *testing.B) {
+				b.ReportAllocs()
+				for i := 0; i < b.N; i++ {
+					buf := pool.Get(size)
+					pool.Put(buf)
+				}
+			})
+			
+			b.Run("WithoutPool", func(b *testing.B) {
+				b.ReportAllocs()
+				for i := 0; i < b.N; i++ {
+					buf := make([]byte, size)
+					_ = buf
+				}
+			})
+		})
+	}
+}
diff --git a/pkg/types.go b/pkg/types.go
index 009f829..631ca35 100644
--- a/pkg/types.go
+++ b/pkg/types.go
@@ -41,6 +41,12 @@ type BlobCacheGlobalConfig struct {
 	HostStorageCapacityThresholdPct float64 `key:"hostStorageCapacityThresholdPct" json:"host_storage_capacity_threshold_pct"`
 	GRPCDialTimeoutS                int     `key:"grpcDialTimeoutS" json:"grpc_dial_timeout_s"`
 	GRPCMessageSizeBytes            int     `key:"grpcMessageSizeBytes" json:"grpc_message_size_bytes"`
+	GRPCInitialWindowSize           int     `key:"grpcInitialWindowSize" json:"grpc_initial_window_size"`
+	GRPCInitialConnWindowSize       int     `key:"grpcInitialConnWindowSize" json:"grpc_initial_conn_window_size"`
+	GRPCWriteBufferSize             int     `key:"grpcWriteBufferSize" json:"grpc_write_buffer_size"`
+	GRPCReadBufferSize              int     `key:"grpcReadBufferSize" json:"grpc_read_buffer_size"`
+	GRPCMaxConcurrentStreams        int     `key:"grpcMaxConcurrentStreams" json:"grpc_max_concurrent_streams"`
+	GRPCNumStreamWorkers            int     `key:"grpcNumStreamWorkers" json:"grpc_num_stream_workers"`
 	DebugMode                       bool    `key:"debugMode" json:"debug_mode"`
 	PrettyLogs                      bool    `key:"prettyLogs" json:"pretty_logs"`
 }
@@ -65,6 +71,7 @@ type BlobCacheServerConfig struct {
 	Mode                  BlobCacheServerMode `key:"mode" json:"mode"`
 	DiskCacheDir          string              `key:"diskCacheDir" json:"disk_cache_dir"`
 	DiskCacheMaxUsagePct  float64             `key:"diskCacheMaxUsagePct" json:"disk_cache_max_usage_pct"`
+	EnableMemoryCache     bool                `key:"enableMemoryCache" json:"enable_memory_cache"`
 	Token                 string              `key:"token" json:"token"`
 	PrettyLogs            bool                `key:"prettyLogs" json:"pretty_logs"`
 	ObjectTtlS            int                 `key:"objectTtlS" json:"object_ttl_s"`