diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 307ada611a859..c102a53214786 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -89,8 +89,8 @@ steps:
   mirror_hardwares: [amd]
 
   commands:
-  - pytest -v -s entrypoints -m llm
-  - pytest -v -s entrypoints -m openai
+  - pytest -v -s entrypoints/llm
+  - pytest -v -s entrypoints/openai
 
 - label: Examples Test
   working_dir: "/vllm-workspace/examples"
diff --git a/pyproject.toml b/pyproject.toml
index 4958aae02594a..790e013620286 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,5 @@ skip_gitignore = true
 [tool.pytest.ini_options]
 markers = [
     "skip_global_cleanup",
-    "llm: run tests for vLLM API only",
-    "openai: run tests for OpenAI API only",
     "vlm: run tests for vision language models only",
 ]
diff --git a/tests/entrypoints/llm/__init__.py b/tests/entrypoints/llm/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/test_llm_encode.py b/tests/entrypoints/llm/test_encode.py
similarity index 98%
rename from tests/entrypoints/test_llm_encode.py
rename to tests/entrypoints/llm/test_encode.py
index 7c3fbe43a8384..d1056a0490509 100644
--- a/tests/entrypoints/test_llm_encode.py
+++ b/tests/entrypoints/llm/test_encode.py
@@ -5,7 +5,7 @@
 
 from vllm import LLM, EmbeddingRequestOutput, PoolingParams
 
-from ..conftest import cleanup
+from ...conftest import cleanup
 
 MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
 
@@ -25,8 +25,6 @@
     [1000, 1003, 1001, 1002],
 ]
 
-pytestmark = pytest.mark.llm
-
 
 @pytest.fixture(scope="module")
 def llm():
diff --git a/tests/entrypoints/test_llm_generate.py b/tests/entrypoints/llm/test_generate.py
similarity index 98%
rename from tests/entrypoints/test_llm_generate.py
rename to tests/entrypoints/llm/test_generate.py
index a00fff91a310e..57ac37f7ea8f7 100644
--- a/tests/entrypoints/test_llm_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
@@ -5,7 +5,7 @@
 
 from vllm import LLM, RequestOutput, SamplingParams
 
-from ..conftest import cleanup
+from ...conftest import cleanup
 
 MODEL_NAME = "facebook/opt-125m"
 
@@ -23,8 +23,6 @@
     [0, 3, 1, 2],
 ]
 
-pytestmark = pytest.mark.llm
-
 
 @pytest.fixture(scope="module")
 def llm():
diff --git a/tests/entrypoints/test_llm_generate_multiple_loras.py b/tests/entrypoints/llm/test_generate_multiple_loras.py
similarity index 96%
rename from tests/entrypoints/test_llm_generate_multiple_loras.py
rename to tests/entrypoints/llm/test_generate_multiple_loras.py
index 176daa472c7a8..35eabf079964a 100644
--- a/tests/entrypoints/test_llm_generate_multiple_loras.py
+++ b/tests/entrypoints/llm/test_generate_multiple_loras.py
@@ -7,7 +7,7 @@
 from vllm import LLM
 from vllm.lora.request import LoRARequest
 
-from ..conftest import cleanup
+from ...conftest import cleanup
 
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 
@@ -20,8 +20,6 @@
 
 LORA_NAME = "typeof/zephyr-7b-beta-lora"
 
-pytestmark = pytest.mark.llm
-
 
 @pytest.fixture(scope="module")
 def llm():
diff --git a/tests/entrypoints/openai/__init__.py b/tests/entrypoints/openai/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/test_openai_chat.py b/tests/entrypoints/openai/test_chat.py
similarity index 99%
rename from tests/entrypoints/test_openai_chat.py
rename to tests/entrypoints/openai/test_chat.py
index 52e647170f6af..f4c0af1adfdf9 100644
--- a/tests/entrypoints/test_openai_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -14,7 +14,7 @@
 from huggingface_hub import snapshot_download
 from openai import BadRequestError
 
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@@ -69,8 +69,6 @@
     "Swift", "Kotlin"
 ]
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def zephyr_lora_files():
diff --git a/tests/entrypoints/test_openai_completion.py b/tests/entrypoints/openai/test_completion.py
similarity index 99%
rename from tests/entrypoints/test_openai_completion.py
rename to tests/entrypoints/openai/test_completion.py
index da5de3666be50..b05035713d7be 100644
--- a/tests/entrypoints/test_openai_completion.py
+++ b/tests/entrypoints/openai/test_completion.py
@@ -16,7 +16,7 @@
 
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@@ -71,8 +71,6 @@
     "Swift", "Kotlin"
 ]
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def zephyr_lora_files():
diff --git a/tests/entrypoints/test_openai_embedding.py b/tests/entrypoints/openai/test_embedding.py
similarity index 97%
rename from tests/entrypoints/test_openai_embedding.py
rename to tests/entrypoints/openai/test_embedding.py
index 45f701733df0c..82a5627aa1d63 100644
--- a/tests/entrypoints/test_openai_embedding.py
+++ b/tests/entrypoints/openai/test_embedding.py
@@ -2,12 +2,10 @@
 import pytest
 import ray
 
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def ray_ctx():
diff --git a/tests/entrypoints/test_guided_processors.py b/tests/entrypoints/openai/test_guided_processors.py
similarity index 99%
rename from tests/entrypoints/test_guided_processors.py
rename to tests/entrypoints/openai/test_guided_processors.py
index fb32a9d155bc0..27568d3e7c26c 100644
--- a/tests/entrypoints/test_guided_processors.py
+++ b/tests/entrypoints/openai/test_guided_processors.py
@@ -52,8 +52,6 @@
 TEST_REGEX = (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
               r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)")
 
-pytestmark = pytest.mark.openai
-
 
 def test_guided_logits_processors():
     """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/openai/test_models.py
similarity index 96%
rename from tests/entrypoints/test_openai_server.py
rename to tests/entrypoints/openai/test_models.py
index ef0d30131bf45..fddfd7550483a 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/openai/test_models.py
@@ -6,7 +6,7 @@
 # downloading lora to test lora requests
 from huggingface_hub import snapshot_download
 
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@@ -14,8 +14,6 @@
 # generation quality here
 LORA_NAME = "typeof/zephyr-7b-beta-lora"
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def zephyr_lora_files():
diff --git a/tests/entrypoints/test_server_oot_registration.py b/tests/entrypoints/openai/test_oot_registration.py
similarity index 97%
rename from tests/entrypoints/test_server_oot_registration.py
rename to tests/entrypoints/openai/test_oot_registration.py
index 3e55d7f4297fb..dbbda6de1fa09 100644
--- a/tests/entrypoints/test_server_oot_registration.py
+++ b/tests/entrypoints/openai/test_oot_registration.py
@@ -1,7 +1,6 @@
 import sys
 import time
 
-import pytest
 import torch
 from openai import OpenAI, OpenAIError
 
@@ -10,8 +9,6 @@
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.utils import get_open_port
 
-pytestmark = pytest.mark.openai
-
 
 class MyOPTForCausalLM(OPTForCausalLM):
 
diff --git a/tests/entrypoints/test_openai_run_batch.py b/tests/entrypoints/openai/test_run_batch.py
similarity index 100%
rename from tests/entrypoints/test_openai_run_batch.py
rename to tests/entrypoints/openai/test_run_batch.py
diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
index c45f02fe564a3..74b49726734b5 100644
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -1,15 +1,11 @@
 import asyncio
 from dataclasses import dataclass
 
-import pytest
-
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
 
 MODEL_NAME = "openai-community/gpt2"
 CHAT_TEMPLATE = "Dummy chat template for testing {}"
 
-pytestmark = pytest.mark.openai
-
 
 @dataclass
 class MockModelConfig:
diff --git a/tests/entrypoints/test_openai_vision.py b/tests/entrypoints/openai/test_vision.py
similarity index 96%
rename from tests/entrypoints/test_openai_vision.py
rename to tests/entrypoints/openai/test_vision.py
index df092680aaeb8..dbaaa349ad371 100644
--- a/tests/entrypoints/test_openai_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from typing import Dict, List
 
 import openai
@@ -8,12 +7,12 @@
 
 from vllm.multimodal.utils import ImageFetchAiohttp, encode_image_base64
 
-from ..utils import RemoteOpenAIServer
+from ...utils import VLLM_PATH, RemoteOpenAIServer
 
 MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
-LLAVA_CHAT_TEMPLATE = (Path(__file__).parent.parent.parent /
-                       "examples/template_llava.jinja")
+LLAVA_CHAT_TEMPLATE = VLLM_PATH / "examples/template_llava.jinja"
 assert LLAVA_CHAT_TEMPLATE.exists()
+
 # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
 TEST_IMAGE_URLS = [
     "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
@@ -22,8 +21,6 @@
     "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
 ]
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def ray_ctx():
@@ -279,7 +276,3 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
     )
     completion = completion.choices[0].text
     assert completion is not None and len(completion) >= 0
-
-
-if __name__ == "__main__":
-    pytest.main([__file__])
diff --git a/tests/utils.py b/tests/utils.py
index 2a5f82b91c42c..09107b5e7e2b7 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,7 +4,8 @@
 import time
 import warnings
 from contextlib import contextmanager
-from typing import Dict, List
+from pathlib import Path
+from typing import Any, Dict, List
 
 import openai
 import ray
@@ -40,8 +41,8 @@ def _nvml():
             nvmlShutdown()
 
 
-# Path to root of repository so that utilities can be imported by ray workers
-VLLM_PATH = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir))
+VLLM_PATH = Path(__file__).parent.parent
+"""Path to root of the vLLM repository."""
 
 
 class RemoteOpenAIServer:
@@ -153,10 +154,12 @@ def init_test_distributed_environment(
 def multi_process_parallel(
     tp_size: int,
     pp_size: int,
-    test_target,
+    test_target: Any,
 ) -> None:
     # Using ray helps debugging the error when it failed
     # as compared to multiprocessing.
+    # NOTE: We need to set working_dir for distributed tests,
+    # otherwise we may get import errors on ray workers
     ray.init(runtime_env={"working_dir": VLLM_PATH})
 
     distributed_init_port = get_open_port()