From 711bceb129068d18777ca0a5c59324f4792e520f Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 8 Oct 2024 21:51:39 -0700 Subject: [PATCH 01/10] fix: check csrf trusted origins for referer --- src/phoenix/config.py | 15 ++++++++++++++ src/phoenix/server/app.py | 31 +++++++++++++++++++++++++++++ tests/integration/auth/conftest.py | 2 ++ tests/integration/auth/test_auth.py | 23 +++++++++++++++++++++ 4 files changed, 71 insertions(+) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index 59ff26dcfe..1f5128194c 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -107,6 +107,11 @@ """ The duration, in minutes, before password reset tokens expire. """ +ENV_PHOENIX_CSRF_TRUSTED_ORIGINS = "PHOENIX_CSRF_TRUSTED_ORIGINS" +""" +A comma-separated list of origins that are allowed to bypass Cross-Site Request Forgery (CSRF) +protection. +""" # SMTP settings ENV_PHOENIX_SMTP_HOSTNAME = "PHOENIX_SMTP_HOSTNAME" @@ -321,6 +326,16 @@ def get_env_refresh_token_expiry() -> timedelta: return timedelta(minutes=minutes) +def get_env_csrf_trusted_origins() -> List[str]: + origins = [] + if csrf_trusted_origins := os.getenv(ENV_PHOENIX_CSRF_TRUSTED_ORIGINS): + for origin in csrf_trusted_origins.split(","): + if not origin: + continue + origins.append(origin) + return list(set(origins)) + + def get_env_smtp_username() -> str: return os.getenv(ENV_PHOENIX_SMTP_USERNAME) or "" diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index 2d0a0e5577..09511535fe 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -27,6 +27,7 @@ Union, cast, ) +from urllib.parse import urlparse import strawberry from fastapi import APIRouter, Depends, FastAPI @@ -42,6 +43,7 @@ from starlette.requests import Request from starlette.responses import PlainTextResponse, Response from starlette.staticfiles import StaticFiles +from starlette.status import HTTP_401_UNAUTHORIZED from starlette.templating import Jinja2Templates from starlette.types import Scope, StatefulLifespan from strawberry.extensions import SchemaExtension @@ -53,8 +55,10 @@ import phoenix.trace.v1 as pb from phoenix.config import ( DEFAULT_PROJECT_NAME, + ENV_PHOENIX_CSRF_TRUSTED_ORIGINS, SERVER_DIR, OAuth2ClientConfig, + get_env_csrf_trusted_origins, get_env_host, get_env_port, server_instrumentation_is_enabled, @@ -226,6 +230,25 @@ async def get_response(self, path: str, scope: Scope) -> Response: return response +class RequestOriginHostnameValidator(BaseHTTPMiddleware): + def __init__(self, trusted_hostnames: List[str], *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._trusted_hostnames = trusted_hostnames + + async def dispatch( + self, + request: Request, + call_next: RequestResponseEndpoint, + ) -> Response: + headers = request.headers + for key in "origin", "referer": + if not (url := headers.get(key)): + continue + if urlparse(url).hostname not in self._trusted_hostnames: + return Response(f"untrusted {key}", status_code=HTTP_401_UNAUTHORIZED) + return await call_next(request) + + class HeadersMiddleware(BaseHTTPMiddleware): async def dispatch( self, @@ -660,6 +683,14 @@ def create_app( ) last_updated_at = LastUpdatedAt() middlewares: List[Middleware] = [Middleware(HeadersMiddleware)] + if origins := get_env_csrf_trusted_origins(): + trusted_hostnames = [h for o in origins if o and (h := urlparse(o).hostname)] + middlewares.append(Middleware(RequestOriginHostnameValidator, trusted_hostnames)) + elif email_sender or oauth2_client_configs: + logger.warning( + "CSRF protection is disabled because trusted origins have not been set via " + f"the `{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}` environment variable." + ) if authentication_enabled and secret: token_store = JwtStore(db, secret) middlewares.append( diff --git a/tests/integration/auth/conftest.py b/tests/integration/auth/conftest.py index d164e547ba..4d7416d8e3 100644 --- a/tests/integration/auth/conftest.py +++ b/tests/integration/auth/conftest.py @@ -8,6 +8,7 @@ from faker import Faker from phoenix.auth import DEFAULT_SECRET_LENGTH from phoenix.config import ( + ENV_PHOENIX_CSRF_TRUSTED_ORIGINS, ENV_PHOENIX_DISABLE_RATE_LIMIT, ENV_PHOENIX_ENABLE_AUTH, ENV_PHOENIX_SECRET, @@ -52,6 +53,7 @@ def _app( (ENV_PHOENIX_SMTP_PASSWORD, "test"), (ENV_PHOENIX_SMTP_MAIL_FROM, _fake.email()), (ENV_PHOENIX_SMTP_VALIDATE_CERTS, "false"), + (ENV_PHOENIX_CSRF_TRUSTED_ORIGINS, ",http://localhost,"), ) with ExitStack() as stack: stack.enter_context(mock.patch.dict(os.environ, values)) diff --git a/tests/integration/auth/test_auth.py b/tests/integration/auth/test_auth.py index e93f21ea41..0a1608e728 100644 --- a/tests/integration/auth/test_auth.py +++ b/tests/integration/auth/test_auth.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta, timezone from functools import partial from typing import ( + Any, ContextManager, DefaultDict, Dict, @@ -53,6 +54,7 @@ _grpc_span_exporter, _Headers, _http_span_exporter, + _httpx_client, _initiate_password_reset, _log_in, _log_out, @@ -73,6 +75,27 @@ _TokenT = TypeVar("_TokenT", _AccessToken, _RefreshToken) +class TestOriginAndReferer: + @pytest.mark.parametrize( + "headers,expectation", + [ + [dict(), _OK], + [dict(origin="http://localhost"), _OK], + [dict(referer="http://localhost/xyz"), _OK], + [dict(origin="http://xyz.com"), _EXPECTATION_401], + [dict(referer="http://xyz.com/xyz"), _EXPECTATION_401], + ], + ) + def test_csrf_origin_validation( + self, + headers: Dict[str, str], + expectation: ContextManager[Any], + ) -> None: + resp = _httpx_client(headers=headers).get("/healthz") + with expectation: + resp.raise_for_status() + + class TestLogIn: @pytest.mark.parametrize("role_or_user", [_MEMBER, _ADMIN, _DEFAULT_ADMIN]) def test_can_log_in( From 658f0436441a59da64eb89afd1795f6327dc1555 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 8 Oct 2024 22:05:04 -0700 Subject: [PATCH 02/10] clean up --- pyproject.toml | 2 +- src/phoenix/config.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 463eb7a4b4..246759ff9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -195,7 +195,7 @@ dependencies = [ "litellm>=1.0.3", "openai>=1.0.0", "tenacity", - "protobuf==3.20", # version minimum (for tests) + "protobuf==3.20.2", # version minimum (for tests) "grpc-interceptor[testing]", "responses", "tiktoken", diff --git a/src/phoenix/config.py b/src/phoenix/config.py index 1f5128194c..e81323b244 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -332,6 +332,11 @@ def get_env_csrf_trusted_origins() -> List[str]: for origin in csrf_trusted_origins.split(","): if not origin: continue + if not urlparse(origin).hostname: + raise ValueError( + f"Missing hostname in `{origin}` for environment variable " + f"`{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}`" + ) origins.append(origin) return list(set(origins)) From 746b4ef2323a1f54906c628f5c2c3b850e93b64b Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 8 Oct 2024 22:07:55 -0700 Subject: [PATCH 03/10] clean up --- src/phoenix/config.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index e81323b244..cf0ae16303 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -328,16 +328,17 @@ def get_env_refresh_token_expiry() -> timedelta: def get_env_csrf_trusted_origins() -> List[str]: origins = [] - if csrf_trusted_origins := os.getenv(ENV_PHOENIX_CSRF_TRUSTED_ORIGINS): - for origin in csrf_trusted_origins.split(","): - if not origin: - continue - if not urlparse(origin).hostname: - raise ValueError( - f"Missing hostname in `{origin}` for environment variable " - f"`{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}`" - ) - origins.append(origin) + if not (csrf_trusted_origins := os.getenv(ENV_PHOENIX_CSRF_TRUSTED_ORIGINS)): + return origins + for origin in csrf_trusted_origins.split(","): + if not origin: + continue + if not urlparse(origin).hostname: + raise ValueError( + f"Missing hostname in `{origin}` for environment variable " + f"`{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}`" + ) + origins.append(origin) return list(set(origins)) From d9df4831d1d750447cab5e560ba54e8ae09d261b Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 09:32:48 -0700 Subject: [PATCH 04/10] fix type check --- src/phoenix/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index cf0ae16303..02f205d55b 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -327,7 +327,7 @@ def get_env_refresh_token_expiry() -> timedelta: def get_env_csrf_trusted_origins() -> List[str]: - origins = [] + origins: List[str] = [] if not (csrf_trusted_origins := os.getenv(ENV_PHOENIX_CSRF_TRUSTED_ORIGINS)): return origins for origin in csrf_trusted_origins.split(","): @@ -339,7 +339,7 @@ def get_env_csrf_trusted_origins() -> List[str]: f"`{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}`" ) origins.append(origin) - return list(set(origins)) + return sorted(set(origins)) def get_env_smtp_username() -> str: From 7edf64cfa69a173aac810bd0fb5bc99d996d3d61 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 09:43:44 -0700 Subject: [PATCH 05/10] extra test cases --- tests/integration/auth/test_auth.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/auth/test_auth.py b/tests/integration/auth/test_auth.py index 0a1608e728..b79bbe53eb 100644 --- a/tests/integration/auth/test_auth.py +++ b/tests/integration/auth/test_auth.py @@ -84,6 +84,8 @@ class TestOriginAndReferer: [dict(referer="http://localhost/xyz"), _OK], [dict(origin="http://xyz.com"), _EXPECTATION_401], [dict(referer="http://xyz.com/xyz"), _EXPECTATION_401], + [dict(origin="http://xyz.com", referer="http://localhost/xyz"), _EXPECTATION_401], + [dict(origin="http://localhost", referer="http://xyz.com/xyz"), _EXPECTATION_401], ], ) def test_csrf_origin_validation( From 182b7e9119515cc36d7c517d09f3c1c2f49c9adc Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 09:50:03 -0700 Subject: [PATCH 06/10] clean up --- src/phoenix/server/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index 09511535fe..c6025c0cb2 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -688,7 +688,7 @@ def create_app( middlewares.append(Middleware(RequestOriginHostnameValidator, trusted_hostnames)) elif email_sender or oauth2_client_configs: logger.warning( - "CSRF protection is disabled because trusted origins have not been set via " + "CSRF protection can be enabled by listing trusted origins via " f"the `{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}` environment variable." ) if authentication_enabled and secret: From 49cbe6a4f0d3c7cf81589ed147a59a603a96f94d Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 09:50:39 -0700 Subject: [PATCH 07/10] clean up --- src/phoenix/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index 02f205d55b..be648c79cf 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -335,7 +335,7 @@ def get_env_csrf_trusted_origins() -> List[str]: continue if not urlparse(origin).hostname: raise ValueError( - f"Missing hostname in `{origin}` for environment variable " + f"Invalid hostname found in in the environment variable " f"`{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}`" ) origins.append(origin) From 508c2bfaf780a15b66491c40143241066f7d193b Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 09:51:47 -0700 Subject: [PATCH 08/10] clean up --- src/phoenix/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index be648c79cf..3f0651fcb6 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -335,8 +335,8 @@ def get_env_csrf_trusted_origins() -> List[str]: continue if not urlparse(origin).hostname: raise ValueError( - f"Invalid hostname found in in the environment variable " - f"`{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}`" + f"The environment variable `{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}` contains a url " + f"with missing hostname. Please ensure that each url has a valid hostname." ) origins.append(origin) return sorted(set(origins)) From 7cf0362309a87850a59d864c97535fd61f2f7742 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 09:56:06 -0700 Subject: [PATCH 09/10] clean up messages --- src/phoenix/config.py | 2 +- src/phoenix/server/app.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index 3f0651fcb6..03957ee996 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -110,7 +110,7 @@ ENV_PHOENIX_CSRF_TRUSTED_ORIGINS = "PHOENIX_CSRF_TRUSTED_ORIGINS" """ A comma-separated list of origins that are allowed to bypass Cross-Site Request Forgery (CSRF) -protection. +protection. This is recommended when setting up OAuth2 clients or sending password reset emails. """ # SMTP settings diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index c6025c0cb2..0807f8668e 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -689,7 +689,9 @@ def create_app( elif email_sender or oauth2_client_configs: logger.warning( "CSRF protection can be enabled by listing trusted origins via " - f"the `{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}` environment variable." + f"the `{ENV_PHOENIX_CSRF_TRUSTED_ORIGINS}` environment variable. " + "This is recommended when setting up OAuth2 clients or sending " + "password reset emails." ) if authentication_enabled and secret: token_store = JwtStore(db, secret) From c84977e1bf3ca33fd80c47a08210a84810dbd9cb Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 9 Oct 2024 10:23:06 -0700 Subject: [PATCH 10/10] clarify docstring --- src/phoenix/config.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/phoenix/config.py b/src/phoenix/config.py index 03957ee996..7000dee779 100644 --- a/src/phoenix/config.py +++ b/src/phoenix/config.py @@ -109,8 +109,11 @@ """ ENV_PHOENIX_CSRF_TRUSTED_ORIGINS = "PHOENIX_CSRF_TRUSTED_ORIGINS" """ -A comma-separated list of origins that are allowed to bypass Cross-Site Request Forgery (CSRF) -protection. This is recommended when setting up OAuth2 clients or sending password reset emails. +A comma-separated list of origins allowed to bypass Cross-Site Request Forgery (CSRF) +protection. This setting is recommended when configuring OAuth2 clients or sending +password reset emails. If this variable is left unspecified or contains no origins, CSRF +protection will not be enabled. In such cases, when a request includes `origin` or `referer` +headers, those values will not be validated. """ # SMTP settings