diff --git a/docs/examples/conftest.py b/docs/examples/conftest.py
index 843d0fe0..b7e1faa5 100644
--- a/docs/examples/conftest.py
+++ b/docs/examples/conftest.py
@@ -108,6 +108,10 @@ def _should_skip_collection(markers):
     if not markers:
         return False, None
 
+    # Check for explicit skip marker first
+    if "skip" in markers:
+        return True, "Example marked with skip marker"
+
     try:
         capabilities = get_system_capabilities()
     except Exception:
@@ -175,6 +179,147 @@ def _check_optional_imports(file_path):
     return False, None
 
 
+def pytest_addoption(parser):
+    """Add command-line options for skipping capability checks.
+
+    These match the options in test/conftest.py to provide consistent behavior.
+    Only adds options if they don't already exist (to avoid conflicts when both
+    test/ and docs/ conftest files are loaded).
+    """
+
+    # Helper to safely add option only if it doesn't exist
+    def add_option_safe(option_name, **kwargs):
+        try:
+            parser.addoption(option_name, **kwargs)
+        except ValueError:
+            # Option already exists (likely from test/conftest.py)
+            pass
+
+    add_option_safe(
+        "--ignore-gpu-check",
+        action="store_true",
+        default=False,
+        help="Ignore GPU requirement checks (examples may fail without GPU)",
+    )
+    add_option_safe(
+        "--ignore-ram-check",
+        action="store_true",
+        default=False,
+        help="Ignore RAM requirement checks (examples may fail with insufficient RAM)",
+    )
+    add_option_safe(
+        "--ignore-ollama-check",
+        action="store_true",
+        default=False,
+        help="Ignore Ollama availability checks (examples will fail if Ollama not running)",
+    )
+    add_option_safe(
+        "--ignore-api-key-check",
+        action="store_true",
+        default=False,
+        help="Ignore API key checks (examples will fail without valid API keys)",
+    )
+    add_option_safe(
+        "--ignore-all-checks",
+        action="store_true",
+        default=False,
+        help="Ignore all requirement checks (GPU, RAM, Ollama, API keys)",
+    )
+
+
+def _collect_vllm_example_files(session) -> list[str]:
+    """Collect all example files that have vLLM marker.
+
+    Returns list of file paths.
+    """
+    vllm_files = set()
+
+    for item in session.items:
+        # Check if this is an ExampleItem with vllm marker
+        if hasattr(item, "path"):
+            file_path = str(item.path)
+            # Check if file has vllm marker
+            if file_path.endswith(".py"):
+                markers = _extract_markers_from_file(file_path)
+                if "vllm" in markers:
+                    vllm_files.add(file_path)
+
+    return sorted(vllm_files)
+
+
+def _run_vllm_examples_isolated(session, vllm_files: list[str]) -> int:
+    """Run vLLM example files in separate processes for GPU memory isolation.
+
+    Returns exit code (0 = all passed, 1 = any failed).
+    """
+    print("\n" + "=" * 70)
+    print("vLLM Process Isolation Active (Examples)")
+    print("=" * 70)
+    print(f"Running {len(vllm_files)} vLLM example(s) in separate processes")
+    print("to ensure GPU memory is fully released between examples.\n")
+
+    # Set environment variables for vLLM
+    env = os.environ.copy()
+    env["VLLM_USE_V1"] = "0"
+    env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+    all_passed = True
+
+    for i, file_path in enumerate(vllm_files, 1):
+        print(f"\n[{i}/{len(vllm_files)}] Running: {file_path}")
+        print("-" * 70)
+
+        # Run example directly with Python
+        cmd = [sys.executable, file_path]
+
+        result = subprocess.run(cmd, env=env)
+
+        if result.returncode != 0:
+            all_passed = False
+            print(f"✗ Example failed: {file_path}")
+        else:
+            print(f"✓ Example passed: {file_path}")
+
+    print("\n" + "=" * 70)
+    if all_passed:
+        print("All vLLM examples passed!")
+    else:
+        print("Some vLLM examples failed.")
+    print("=" * 70 + "\n")
+
+    return 0 if all_passed else 1
+
+
+def pytest_collection_finish(session):
+    """After collection, check if we need vLLM process isolation for examples.
+
+    If vLLM examples are collected and there are multiple files,
+    run them in separate processes and exit.
+    """
+    # Only check for examples in docs/examples
+    if not any(
+        "docs" in str(item.path) and "examples" in str(item.path)
+        for item in session.items
+    ):
+        return
+
+    # Collect vLLM example files
+    vllm_files = _collect_vllm_example_files(session)
+
+    # Only use process isolation if multiple vLLM examples
+    if len(vllm_files) <= 1:
+        return
+
+    # Run examples in isolation
+    exit_code = _run_vllm_examples_isolated(session, vllm_files)
+
+    # Clear collected items so pytest doesn't run them again
+    session.items.clear()
+
+    # Exit with appropriate code
+    pytest.exit("vLLM examples completed in isolated processes", returncode=exit_code)
+
+
 def pytest_terminal_summary(terminalreporter, exitstatus, config):
     # Append the skipped examples if needed.
     if len(examples_to_skip) == 0:
@@ -188,21 +333,73 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
     )
 
 
-def pytest_ignore_collect(collection_path, path, config):
+def pytest_pycollect_makemodule(module_path, parent):
+    """Intercepts Module creation to skip files before import.
+
+    Runs for both directory traversal and direct file specification.
+    Returning a SkippedFile prevents pytest from importing the file,
+    which is necessary when files contain unavailable dependencies.
+
+    Args:
+        module_path: pathlib.Path to the module
+        parent: Parent collector node
+    """
+    file_path = module_path
+
+    # Limit scope to docs/examples directory
+    if "docs" not in str(file_path) or "examples" not in str(file_path):
+        return None
+
+    if file_path.name == "conftest.py":
+        return None
+
+    # Initialize capabilities cache if needed
+    config = parent.config
+    if not hasattr(config, "_example_capabilities"):
+        config._example_capabilities = get_system_capabilities()
+
+    # Check manual skip list
+    if file_path.name in examples_to_skip:
+        return SkippedFile.from_parent(parent, path=file_path)
+
+    # Extract and evaluate markers
+    markers = _extract_markers_from_file(file_path)
+
+    if not markers:
+        return None
+
+    should_skip, _reason = _should_skip_collection(markers)
+
+    if should_skip:
+        # Prevent import by returning custom collector
+        return SkippedFile.from_parent(parent, path=file_path)
+
+    return None
+
+
+def pytest_ignore_collect(collection_path, config):
     """Ignore files before pytest even tries to parse them.
 
     This is called BEFORE pytest_collect_file, so we can prevent
     heavy files from being parsed at all.
+
+    NOTE: This hook is only called during directory traversal, not for
+    directly specified files. The pytest_pycollect_makemodule hook handles
+    both cases.
     """
     # Skip conftest.py itself - it's not a test
     if collection_path.name == "conftest.py":
         return True
 
+    # Convert to absolute path to check if it's in docs/examples
+    # (pytest may pass relative paths)
+    abs_path = collection_path.resolve()
+
     # Only check Python files in docs/examples
     if (
         collection_path.suffix == ".py"
-        and "docs" in collection_path.parts
-        and "examples" in collection_path.parts
+        and "docs" in abs_path.parts
+        and "examples" in abs_path.parts
     ):
         # Skip files in the manual skip list
         if collection_path.name in examples_to_skip:
@@ -215,34 +412,16 @@ def pytest_ignore_collect(collection_path, path, config):
             if should_skip:
                 # Return True to ignore this file completely
                 return True
-        except Exception:
-            # If anything goes wrong, don't skip
-            pass
-
-    return False
-
-
-def pytest_pycollect_makemodule(module_path, path, parent):
-    """Prevent pytest from importing Python modules as test modules.
+        except Exception as e:
+            # Log the error but don't skip - let pytest handle it
+            import sys
 
-    This hook is called BEFORE pytest tries to import the module,
-    so we can prevent import errors from optional dependencies.
-    """
-    # Only handle files in docs/examples
-    if (
-        module_path.suffix == ".py"
-        and "docs" in module_path.parts
-        and "examples" in module_path.parts
-    ):
-        # Check for optional imports
-        should_skip, _reason = _check_optional_imports(module_path)
-        if should_skip:
-            # Add to skip list and return None to prevent module creation
-            examples_to_skip.add(module_path.name)
-            return None
+            print(
+                f"WARNING: Error checking markers for {collection_path}: {e}",
+                file=sys.stderr,
+            )
 
-    # Return None to let pytest handle it normally
-    return None
+    return False
 
 
 # This doesn't replace the existing pytest file collection behavior.
@@ -258,14 +437,49 @@ def pytest_collect_file(parent: pytest.Dir, file_path: pathlib.PosixPath):
         if file_path.name in examples_to_skip:
             return
 
+        # Check markers first - if file has skip marker, return SkippedFile
+        try:
+            markers = _extract_markers_from_file(file_path)
+            should_skip, _reason = _should_skip_collection(markers)
+            if should_skip:
+                # FIX: Return a dummy collector instead of None.
+                # This prevents pytest from falling back to the default Module collector
+                # which would try to import the file.
+                return SkippedFile.from_parent(parent, path=file_path)
+        except Exception:
+            # If we can't read markers, continue with other checks
+            pass
+
         # Check for optional imports before creating ExampleFile
         should_skip, _reason = _check_optional_imports(file_path)
         if should_skip:
-            return None
+            # FIX: Return SkippedFile instead of None for optional import skips too
+            return SkippedFile.from_parent(parent, path=file_path)
 
         return ExampleFile.from_parent(parent, path=file_path)
 
 
+class SkippedFile(pytest.File):
+    """A dummy collector for skipped files to prevent default import.
+
+    This collector is returned by pytest_pycollect_makemodule and pytest_collect_file
+    when a file should be skipped based on markers or system capabilities.
+
+    By returning this custom collector instead of None, we prevent pytest from
+    falling back to its default Module collector which would import the file.
+    The collect() method returns an empty list, so no tests are collected.
+    """
+
+    def __init__(self, **kwargs):
+        # Extract reason if provided, otherwise use default
+        self.skip_reason = kwargs.pop("reason", "File skipped based on markers")
+        super().__init__(**kwargs)
+
+    def collect(self):
+        # Return empty list - no tests to collect from this file
+        return []
+
+
 class ExampleFile(pytest.File):
     def collect(self):
         return [ExampleItem.from_parent(self, name=self.name)]
@@ -339,17 +553,27 @@ def pytest_runtest_setup(item):
     if not isinstance(item, ExampleItem):
         return
 
+    # Check for explicit skip marker first
+    if item.get_closest_marker("skip"):
+        pytest.skip("Example marked with skip marker")
+
     # Get system capabilities
     capabilities = get_system_capabilities()
 
     # Get gh_run status (CI environment)
     gh_run = int(os.environ.get("CICD", 0))
 
-    # Get config options (all default to False for examples)
-    ignore_gpu = False
-    ignore_ram = False
-    ignore_ollama = False
-    ignore_api_key = False
+    # Get config options from CLI (matching test/conftest.py behavior)
+    config = item.config
+    ignore_all = config.getoption("--ignore-all-checks", default=False)
+    ignore_gpu = config.getoption("--ignore-gpu-check", default=False) or ignore_all
+    ignore_ram = config.getoption("--ignore-ram-check", default=False) or ignore_all
+    ignore_ollama = (
+        config.getoption("--ignore-ollama-check", default=False) or ignore_all
+    )
+    ignore_api_key = (
+        config.getoption("--ignore-api-key-check", default=False) or ignore_all
+    )
 
     # Skip qualitative tests in CI
     if item.get_closest_marker("qualitative") and gh_run == 1:
diff --git a/mellea/backends/vllm.py b/mellea/backends/vllm.py
index 0f854f69..a01900dd 100644
--- a/mellea/backends/vllm.py
+++ b/mellea/backends/vllm.py
@@ -70,6 +70,9 @@ class LocalVLLMBackend(FormatterBackend):
     Its throughput is generally higher than that of LocalHFBackend.
     However, it takes longer to load the weights during the instantiation.
     Also, if you submit a request one by one, it can be slower.
+
+    Note: vLLM defaults to ~16 tokens. Always set ModelOption.MAX_NEW_TOKENS explicitly (100-1000+).
+    Structured output needs 200-500+ tokens.
     """
 
     def __init__(
diff --git a/pyproject.toml b/pyproject.toml
index 0568389b..5bac082b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -273,6 +273,11 @@ filterwarnings = [
     # Deduplicate noisy Pydantic serialization warnings
     "once:.*PydanticSerializationUnexpectedValue.*:UserWarning",
     
+    # Ignore SWIG C/C++ binding warnings from vLLM (Python 3.12 issue)
+    "ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning",
+    "ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
+    "ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
+    
     # Keep Watsonx deprecation visible (important for migration)
     "default:.*Watsonx Backend is deprecated.*:DeprecationWarning",
 ]
diff --git a/test/README.md b/test/README.md
index a05d67a0..b44f4724 100644
--- a/test/README.md
+++ b/test/README.md
@@ -13,9 +13,65 @@ uv run pytest
 
 # All tests including slow tests (>5 min)
 uv run pytest -m slow
-uv run pytest  # without pytest.ini config
 ```
 
+## Environment Variables
+
+- `CICD=1` - Enable CI mode (skips qualitative tests, enables aggressive memory cleanup)
+- `VLLM_USE_V1=0` - Required for vLLM tests (automatically set by process isolation)
+- `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True` - Helps with GPU memory fragmentation
+
+## Heavy GPU Tests - Automatic Process Isolation
+
+**Heavy GPU tests (HuggingFace, vLLM) automatically use process isolation when multiple test modules are detected.**
+
+### Why Process Isolation?
+
+Heavy GPU backends (HuggingFace, vLLM) hold GPU memory at the process level. Even with aggressive cleanup (garbage collection, CUDA cache clearing, etc.), GPU memory remains locked by the CUDA driver until the process exits. When running multiple heavy GPU test modules in sequence, this causes OOM errors.
+
+### How It Works
+
+The collection hook in `test/conftest.py` detects multiple modules with `requires_heavy_ram` marker and automatically:
+
+1. Runs each module in a separate subprocess
+2. Sets required environment variables (`VLLM_USE_V1=0`, `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True`)
+3. Ensures full GPU memory release between modules
+4. Reports results from all modules
+
+### Usage
+
+```bash
+# Run all heavy GPU tests with automatic isolation
+uv run pytest -m requires_heavy_ram
+
+# Run vLLM tests specifically
+uv run pytest -m vllm
+
+# Run HuggingFace tests specifically
+uv run pytest -m huggingface
+
+# Single module runs normally (no isolation needed)
+uv run pytest test/backends/test_vllm.py
+
+# Works with other pytest options
+uv run pytest -m "requires_heavy_ram and not qualitative"
+```
+
+### Affected Tests
+
+Tests marked with `@pytest.mark.requires_heavy_ram`:
+- `test/backends/test_huggingface.py` - HuggingFace backend tests
+- `test/backends/test_huggingface_tools.py` - HuggingFace tool calling
+- `test/backends/test_vllm.py` - vLLM backend tests
+- `test/backends/test_vllm_tools.py` - vLLM tool calling
+
+### Technical Details
+
+- **Single module**: Runs normally in the main pytest process
+- **Multiple modules**: Each runs in its own subprocess with full GPU memory isolation
+- **No external server needed**: Tests instantiate `LocalVLLMBackend` directly
+- **Automatic detection**: Based on `@pytest.mark.vllm` marker
+
 ## GPU Testing on CUDA Systems
 
 ### The Problem: CUDA EXCLUSIVE_PROCESS Mode
@@ -86,6 +142,7 @@ However, this creates the "Parent Trap": the parent pytest process holds a CUDA
 See [`MARKERS_GUIDE.md`](MARKERS_GUIDE.md) for complete marker documentation.
 
 Key markers for GPU testing:
+- `@pytest.mark.vllm` - Requires vLLM backend (local, GPU required, auto-isolated)
 - `@pytest.mark.huggingface` - Requires HuggingFace backend (local, GPU-heavy)
 - `@pytest.mark.requires_gpu` - Requires GPU hardware
 - `@pytest.mark.requires_heavy_ram` - Requires 48GB+ RAM
diff --git a/test/backends/test_vllm.py b/test/backends/test_vllm.py
index 99bbef2b..23e0feac 100644
--- a/test/backends/test_vllm.py
+++ b/test/backends/test_vllm.py
@@ -50,7 +50,12 @@ def backend():
             "max_num_seqs": 8,
         },
     )
-    return backend
+    yield backend
+
+    # Cleanup using shared function (best-effort within module)
+    from test.conftest import cleanup_vllm_backend
+
+    cleanup_vllm_backend(backend)
 
 
 @pytest.fixture(scope="function")
@@ -141,6 +146,7 @@ class Answer(pydantic.BaseModel):
         actions=[CBlock(value=prompt) for prompt in prompts],
         ctx=session.ctx,
         format=Answer,
+        model_options={ModelOption.MAX_NEW_TOKENS: 100},
     )
 
     assert len(results) == len(prompts)
diff --git a/test/backends/test_vllm_tools.py b/test/backends/test_vllm_tools.py
index a59ad7e8..4f02bb1e 100644
--- a/test/backends/test_vllm_tools.py
+++ b/test/backends/test_vllm_tools.py
@@ -46,7 +46,12 @@ def backend():
             "max_num_seqs": 8,
         },
     )
-    return backend
+    yield backend
+
+    # Cleanup using shared function (best-effort within module)
+    from test.conftest import cleanup_vllm_backend
+
+    cleanup_vllm_backend(backend)
 
 
 @pytest.fixture(scope="function")
diff --git a/test/conftest.py b/test/conftest.py
index 9773f73d..98a5b745 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -110,32 +110,45 @@ def gh_run() -> int:
 
 
 def pytest_addoption(parser):
-    """Add custom command-line options."""
-    parser.addoption(
+    """Add custom command-line options.
+
+    Uses safe registration to avoid conflicts when both test/ and docs/
+    conftest files are loaded.
+    """
+
+    # Helper to safely add option only if it doesn't exist
+    def add_option_safe(option_name, **kwargs):
+        try:
+            parser.addoption(option_name, **kwargs)
+        except ValueError:
+            # Option already exists (likely from docs/examples/conftest.py)
+            pass
+
+    add_option_safe(
         "--ignore-gpu-check",
         action="store_true",
         default=False,
         help="Ignore GPU requirement checks (tests may fail without GPU)",
     )
-    parser.addoption(
+    add_option_safe(
         "--ignore-ram-check",
         action="store_true",
         default=False,
         help="Ignore RAM requirement checks (tests may fail with insufficient RAM)",
     )
-    parser.addoption(
+    add_option_safe(
         "--ignore-ollama-check",
         action="store_true",
         default=False,
         help="Ignore Ollama availability checks (tests will fail if Ollama not running)",
     )
-    parser.addoption(
+    add_option_safe(
         "--ignore-api-key-check",
         action="store_true",
         default=False,
         help="Ignore API key checks (tests will fail without valid API keys)",
     )
-    parser.addoption(
+    add_option_safe(
         "--ignore-all-checks",
         action="store_true",
         default=False,
@@ -176,9 +189,248 @@ def pytest_configure(config):
         "markers", "llm: Tests that make LLM calls (needs at least Ollama)"
     )
 
+    # Store vLLM isolation flag in config
+    config._vllm_process_isolation = False
+
 
 # ============================================================================
-# Test Skipping Logic
+# Heavy GPU Test Process Isolation
+# ============================================================================
+
+
+def _collect_heavy_ram_modules(session) -> list[str]:
+    """Collect all test modules that have heavy RAM tests (HuggingFace, vLLM, etc.).
+
+    Returns list of module paths (e.g., 'test/backends/test_vllm.py').
+    """
+    heavy_modules = set()
+
+    for item in session.items:
+        # Check if test has requires_heavy_ram marker (covers HF, vLLM, etc.)
+        if item.get_closest_marker("requires_heavy_ram"):
+            # Get the module path
+            module_path = str(item.path)
+            heavy_modules.add(module_path)
+
+    return sorted(heavy_modules)
+
+
+def _run_heavy_modules_isolated(session, heavy_modules: list[str]) -> int:
+    """Run heavy RAM test modules in separate processes for GPU memory isolation.
+
+    Streams output in real-time and parses for test failures to provide
+    a clear summary at the end.
+
+    Returns exit code (0 = all passed, 1 = any failed).
+    """
+    print("\n" + "=" * 70)
+    print("Heavy GPU Test Process Isolation Active")
+    print("=" * 70)
+    print(
+        f"Running {len(heavy_modules)} heavy GPU test module(s) in separate processes"
+    )
+    print("to ensure GPU memory is fully released between modules.\n")
+
+    # Set environment variables for vLLM
+    env = os.environ.copy()
+    env["VLLM_USE_V1"] = "0"
+    env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+    all_passed = True
+    failed_modules = {}  # module_path -> list of failed test names
+
+    for i, module_path in enumerate(heavy_modules, 1):
+        print(f"\n[{i}/{len(heavy_modules)}] Running: {module_path}")
+        print("-" * 70)
+
+        # Build pytest command with same options as parent session
+        cmd = [sys.executable, "-m", "pytest", module_path, "-v"]
+
+        # Add markers from original command if present
+        config = session.config
+        markexpr = config.getoption("-m", default=None)
+        if markexpr:
+            cmd.extend(["-m", markexpr])
+
+        # Stream output in real-time while capturing for parsing
+        process = subprocess.Popen(
+            cmd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,  # Merge stderr into stdout
+            text=True,
+            bufsize=1,  # Line buffered for immediate output
+        )
+
+        failed_tests = []
+
+        # Stream output line by line
+        if process.stdout:
+            for line in process.stdout:
+                print(line, end="")  # Print immediately (streaming)
+
+                # Parse for failures (pytest format: "test_file.py::test_name FAILED")
+                if " FAILED " in line:
+                    # Extract test name from pytest output
+                    try:
+                        parts = line.split(" FAILED ")
+                        if len(parts) >= 2:
+                            # Get the test identifier (the part before " FAILED ")
+                            # Strip whitespace and take last token (handles indentation)
+                            test_name = parts[0].strip().split()[-1]
+                            failed_tests.append(test_name)
+                    except Exception:
+                        # If parsing fails, continue - we'll still show module failed
+                        pass
+
+        process.wait()
+
+        if process.returncode != 0:
+            all_passed = False
+            failed_modules[module_path] = failed_tests
+            print(f"✗ Module failed: {module_path}")
+        else:
+            print(f"✓ Module passed: {module_path}")
+
+    print("\n" + "=" * 70)
+    if all_passed:
+        print("All heavy GPU modules passed!")
+    else:
+        print(f"Failed modules ({len(failed_modules)}):")
+        for module, tests in failed_modules.items():
+            print(f"  {module}:")
+            if tests:
+                for test in tests:
+                    print(f"    - {test}")
+            else:
+                print("    (module failed but couldn't parse specific test names)")
+    print("=" * 70 + "\n")
+
+    return 0 if all_passed else 1
+
+
+# ============================================================================
+# vLLM Backend Cleanup Helper
+# ============================================================================
+
+
+def cleanup_vllm_backend(backend):
+    """Best-effort cleanup of vLLM backend GPU memory.
+
+    Note: CUDA driver holds GPU memory at process level. Only process exit
+    reliably releases it. Cross-module isolation uses separate subprocesses
+    (see pytest_collection_finish hook).
+
+    Args:
+        backend: The vLLM backend instance to cleanup
+    """
+    import gc
+    import time
+
+    import torch
+
+    backend._underlying_model.shutdown_background_loop()
+    del backend._underlying_model
+    del backend
+    gc.collect()
+
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+        torch.cuda.empty_cache()
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+        torch.cuda.reset_peak_memory_stats()
+        torch.cuda.reset_accumulated_memory_stats()
+
+        # Cleanup NCCL process groups to suppress warnings
+        if torch.distributed.is_initialized():
+            try:
+                torch.distributed.destroy_process_group()
+            except Exception:
+                # Ignore if already destroyed
+                pass
+
+        for _ in range(3):
+            gc.collect()
+            torch.cuda.empty_cache()
+            time.sleep(1)
+
+
+def pytest_collection_finish(session):
+    """After collection, check if we need heavy GPU test process isolation.
+
+    If heavy RAM tests (HuggingFace, vLLM, etc.) are collected and there are
+    multiple modules, run them in separate processes and exit.
+
+    Only activates on systems with CUDA GPUs where memory isolation is needed.
+    """
+    # Only use process isolation on CUDA systems (not macOS/MPS)
+    config = session.config
+    ignore_gpu = config.getoption(
+        "--ignore-gpu-check", default=False
+    ) or config.getoption("--ignore-all-checks", default=False)
+
+    # Check if we have CUDA (not just any GPU - MPS doesn't need this)
+    has_cuda = False
+    if HAS_TORCH and not ignore_gpu:
+        import torch
+
+        has_cuda = torch.cuda.is_available()
+
+    # Only use process isolation if we have CUDA GPU
+    if not has_cuda and not ignore_gpu:
+        return
+
+    # Collect heavy RAM modules
+    heavy_modules = _collect_heavy_ram_modules(session)
+
+    # Only use process isolation if multiple modules
+    if len(heavy_modules) <= 1:
+        return
+
+    # Run modules in isolation
+    exit_code = _run_heavy_modules_isolated(session, heavy_modules)
+
+    # Clear collected items so pytest doesn't run them again
+    session.items.clear()
+
+    # Set flag to indicate we handled heavy tests
+    session.config._heavy_process_isolation = True
+
+    # Exit with appropriate code
+    pytest.exit("Heavy GPU tests completed in isolated processes", returncode=exit_code)
+
+
+# ============================================================================
+# Test Collection Filtering
+# ============================================================================
+
+
+def pytest_collection_modifyitems(config, items):
+    """Skip tests at collection time based on markers.
+
+    This prevents fixture setup errors for tests that would be skipped anyway.
+    """
+    capabilities = get_system_capabilities()
+
+    # Check for override flags
+    ignore_all = config.getoption("--ignore-all-checks", default=False)
+    ignore_ollama = (
+        config.getoption("--ignore-ollama-check", default=False) or ignore_all
+    )
+
+    skip_ollama = pytest.mark.skip(
+        reason="Ollama not available (port 11434 not listening)"
+    )
+
+    for item in items:
+        # Skip ollama tests if ollama not available
+        if item.get_closest_marker("ollama") and not ignore_ollama:
+            if not capabilities["has_ollama"]:
+                item.add_marker(skip_ollama)
+
+
+# ============================================================================
+# Test Skipping Logic (Runtime)
 # ============================================================================
 
 
@@ -199,9 +451,6 @@ def pytest_runtest_setup(item):
     ignore_all = config.getoption("--ignore-all-checks", default=False)
     ignore_gpu = config.getoption("--ignore-gpu-check", default=False) or ignore_all
     ignore_ram = config.getoption("--ignore-ram-check", default=False) or ignore_all
-    ignore_ollama = (
-        config.getoption("--ignore-ollama-check", default=False) or ignore_all
-    )
     ignore_api_key = (
         config.getoption("--ignore-api-key-check", default=False) or ignore_all
     )
@@ -257,11 +506,8 @@ def pytest_runtest_setup(item):
         if not capabilities["has_gpu"]:
             pytest.skip("Skipping test: vLLM requires GPU")
 
-    if item.get_closest_marker("ollama") and not ignore_ollama:
-        if not capabilities["has_ollama"]:
-            pytest.skip(
-                "Skipping test: Ollama not available (port 11434 not listening)"
-            )
+    # Note: Ollama tests are now skipped at collection time in pytest_collection_modifyitems
+    # to prevent fixture setup errors
 
 
 def memory_cleaner():