From 8ea71bd119e9f49fd8f54875a4215be26987dca4 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Mon, 28 Aug 2023 02:57:43 -0400 Subject: [PATCH 01/20] Reaching till GraphQLWithContext --- src/phoenix/core/umap_parameters.py | 7 +++++++ src/phoenix/server/api/context.py | 2 ++ src/phoenix/server/app.py | 6 ++++++ src/phoenix/session/session.py | 26 +++++++++++++++++++++++--- 4 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 src/phoenix/core/umap_parameters.py diff --git a/src/phoenix/core/umap_parameters.py b/src/phoenix/core/umap_parameters.py new file mode 100644 index 0000000000..1ecf4ad7de --- /dev/null +++ b/src/phoenix/core/umap_parameters.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass +from typing import Union + + +@dataclass +class UMAPParameters: + default_umap_params: dict[str, Union[int, float]] diff --git a/src/phoenix/server/api/context.py b/src/phoenix/server/api/context.py index 2d6a95be7e..d558407d97 100644 --- a/src/phoenix/server/api/context.py +++ b/src/phoenix/server/api/context.py @@ -8,6 +8,7 @@ from phoenix.core.model_schema import Model from phoenix.core.traces import Traces +from phoenix.core.umap_parameters import UMAPParameters @dataclass @@ -18,3 +19,4 @@ class Context: export_path: Path corpus: Optional[Model] = None traces: Optional[Traces] = None + config: Optional[UMAPParameters] = None diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index 3c19a8efe9..45b26ef84b 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -20,6 +20,7 @@ from phoenix.config import SERVER_DIR from phoenix.core.model_schema import Model from phoenix.core.traces import Traces +from phoenix.core.umap_parameters import UMAPParameters from .api.context import Context from .api.schema import schema @@ -67,10 +68,12 @@ def __init__( graphiql: bool = False, corpus: Optional[Model] = None, traces: Optional[Traces] = None, + config: Optional[UMAPParameters] = None, ) -> None: self.model = model self.corpus = corpus self.traces = traces + self.config = config self.export_path = export_path super().__init__(schema, graphiql=graphiql) @@ -85,6 +88,7 @@ async def get_context( model=self.model, corpus=self.corpus, traces=self.traces, + config=self.config, export_path=self.export_path, ) @@ -109,6 +113,7 @@ def create_app( model: Model, corpus: Optional[Model] = None, traces: Optional[Traces] = None, + config: Optional[UMAPParameters] = None, debug: bool = False, ) -> Starlette: graphql = GraphQLWithContext( @@ -116,6 +121,7 @@ def create_app( model=model, corpus=corpus, traces=traces, + config=config, export_path=export_path, graphiql=True, ) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index bb175c8026..373bb134d5 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -3,7 +3,7 @@ from collections import UserList from pathlib import Path from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING, Iterable, List, Optional, Set +from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Union import pandas as pd from portpicker import pick_unused_port @@ -11,6 +11,7 @@ from phoenix.config import PORT, get_exported_files from phoenix.core.model_schema_adapter import create_model_from_datasets from phoenix.core.traces import Traces +from phoenix.core.umap_parameters import UMAPParameters from phoenix.datasets.dataset import EMPTY_DATASET, Dataset from phoenix.server.app import create_app from phoenix.server.thread_server import ThreadServer @@ -66,12 +67,18 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, + default_umap_parameters: Optional[dict[str, int]] = None, port: int = PORT, ): self.primary_dataset = primary_dataset self.reference_dataset = reference_dataset self.corpus_dataset = corpus_dataset self.trace_dataset = trace_dataset + self.config = ( + UMAPParameters(default_umap_parameters) + if default_umap_parameters is not None + else None, + ) self.model = create_model_from_datasets( primary_dataset, reference_dataset, @@ -144,6 +151,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, + default_umap_parameters: Optional[dict[str, int]] = None, port: Optional[int] = None, ) -> None: super().__init__( @@ -151,6 +159,7 @@ def __init__( reference_dataset=reference_dataset, corpus_dataset=corpus_dataset, trace_dataset=trace_dataset, + default_umap_parameters=default_umap_parameters, port=port or PORT, ) primary_dataset.to_disc() @@ -169,6 +178,7 @@ def __init__( corpus_dataset_name=( self.corpus_dataset.name if self.corpus_dataset is not None else None ), + # TO-DO default_umap_params for ProcessSession ) @property @@ -187,6 +197,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, + default_umap_parameters: Optional[dict[str, int]] = None, port: Optional[int] = None, ): super().__init__( @@ -194,6 +205,7 @@ def __init__( reference_dataset=reference_dataset, corpus_dataset=corpus_dataset, trace_dataset=trace_dataset, + default_umap_parameters=default_umap_parameters, port=port or pick_unused_port(), ) # Initialize an app service that keeps the server running @@ -202,6 +214,7 @@ def __init__( model=self.model, corpus=self.corpus, traces=self.traces, + config=self.config, ) self.server = ThreadServer( app=self.app, @@ -224,6 +237,7 @@ def launch_app( reference: Optional[Dataset] = None, corpus: Optional[Dataset] = None, trace: Optional[TraceDataset] = None, + default_umap_parameters: Optional[dict[str, Union[int, float]]] = None, port: Optional[int] = None, run_in_thread: Optional[bool] = True, ) -> Optional[Session]: @@ -245,6 +259,8 @@ def launch_app( The port on which the server listens. run_in_thread: bool, optional, default=True Whether the server should run in a Thread or Process. + default_umap_parameters: dict[str, Union[int, float]], optional, default=None + User specified default UMAP parameters eg: {nNeighbors: 10, nSamples: 5, minDist: 0.5} Returns ------- @@ -273,7 +289,9 @@ def launch_app( "it down and starting a new instance..." ) _session.end() - _session = ThreadSession(primary, reference, corpus, trace, port=port) + _session = ThreadSession( + primary, reference, corpus, trace, default_umap_parameters, port=port + ) # TODO: catch exceptions from thread if not _session.active: logger.error( @@ -282,7 +300,9 @@ def launch_app( ) return None else: - _session = ProcessSession(primary, reference, corpus, trace, port=port) + _session = ProcessSession( + primary, reference, corpus, trace, default_umap_parameters, port=port + ) print(f"🌍 To view the Phoenix app in your browser, visit {_session.url}") print("📺 To view the Phoenix app in a notebook, run `px.active_session().view()`") From 689b6f5c8178c39efb2394c5b2b4a45235e60d77 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Mon, 28 Aug 2023 03:10:42 -0400 Subject: [PATCH 02/20] typing.Dict not dict() --- src/phoenix/core/umap_parameters.py | 2 +- src/phoenix/session/session.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/phoenix/core/umap_parameters.py b/src/phoenix/core/umap_parameters.py index 1ecf4ad7de..b1983b3592 100644 --- a/src/phoenix/core/umap_parameters.py +++ b/src/phoenix/core/umap_parameters.py @@ -4,4 +4,4 @@ @dataclass class UMAPParameters: - default_umap_params: dict[str, Union[int, float]] + default_umap_params: Dict[str, Union[int, float]] diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 373bb134d5..ae6aade019 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -3,7 +3,7 @@ from collections import UserList from pathlib import Path from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Union +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Union import pandas as pd from portpicker import pick_unused_port @@ -67,7 +67,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[dict[str, int]] = None, + default_umap_parameters: Optional[Dict[str, int]] = None, port: int = PORT, ): self.primary_dataset = primary_dataset @@ -151,7 +151,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[dict[str, int]] = None, + default_umap_parameters: Optional[Dict[str, int]] = None, port: Optional[int] = None, ) -> None: super().__init__( @@ -197,7 +197,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[dict[str, int]] = None, + default_umap_parameters: Optional[Dict[str, int]] = None, port: Optional[int] = None, ): super().__init__( @@ -237,7 +237,7 @@ def launch_app( reference: Optional[Dataset] = None, corpus: Optional[Dataset] = None, trace: Optional[TraceDataset] = None, - default_umap_parameters: Optional[dict[str, Union[int, float]]] = None, + default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, port: Optional[int] = None, run_in_thread: Optional[bool] = True, ) -> Optional[Session]: @@ -259,7 +259,7 @@ def launch_app( The port on which the server listens. run_in_thread: bool, optional, default=True Whether the server should run in a Thread or Process. - default_umap_parameters: dict[str, Union[int, float]], optional, default=None + default_umap_parameters: Dict[str, Union[int, float]], optional, default=None User specified default UMAP parameters eg: {nNeighbors: 10, nSamples: 5, minDist: 0.5} Returns From 2834175ea9d5479235fb2c4b346898b3510b7944 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Sat, 9 Sep 2023 19:35:00 -0400 Subject: [PATCH 03/20] Dict import --- src/phoenix/core/umap_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/core/umap_parameters.py b/src/phoenix/core/umap_parameters.py index b1983b3592..e5a8e32a58 100644 --- a/src/phoenix/core/umap_parameters.py +++ b/src/phoenix/core/umap_parameters.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Union +from typing import Union, Dict @dataclass From 5e195e4461813122e368664b56fbbcd8d0333ef1 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Fri, 15 Sep 2023 11:46:13 -0400 Subject: [PATCH 04/20] Suggested review changes Added validation, moved umap_params to pointcloud and removed Optional for self.umap_parameters in context --- src/phoenix/core/umap_parameters.py | 7 --- src/phoenix/pointcloud/umap_parameters.py | 53 +++++++++++++++++++++++ src/phoenix/server/api/context.py | 4 +- src/phoenix/server/app.py | 12 ++--- src/phoenix/session/session.py | 12 ++--- 5 files changed, 65 insertions(+), 23 deletions(-) delete mode 100644 src/phoenix/core/umap_parameters.py create mode 100644 src/phoenix/pointcloud/umap_parameters.py diff --git a/src/phoenix/core/umap_parameters.py b/src/phoenix/core/umap_parameters.py deleted file mode 100644 index e5a8e32a58..0000000000 --- a/src/phoenix/core/umap_parameters.py +++ /dev/null @@ -1,7 +0,0 @@ -from dataclasses import dataclass -from typing import Union, Dict - - -@dataclass -class UMAPParameters: - default_umap_params: Dict[str, Union[int, float]] diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py new file mode 100644 index 0000000000..3a4792e51e --- /dev/null +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -0,0 +1,53 @@ +from dataclasses import dataclass + +DEFAULT_MIN_DIST = 0.0 +DEFAULT_N_NEIGHBORS = 30 +DEFAULT_N_SAMPLES = 500 + +MIN_NEIGHBORS = 5 +MAX_NEIGHBORS = 100 +MIN_SAMPLES = 1 +MAX_SAMPLES = 1000 +MIN_MIN_DIST = 0.0 +MAX_MIN_DIST = 0.99 + + +def get_umap_parameters(default_umap_parameters): + return UMAPParameters( + min_dist=default_umap_parameters.get("minDist", DEFAULT_MIN_DIST), + n_neighbors=default_umap_parameters.get("nNeighbors", DEFAULT_N_NEIGHBORS), + n_samples=default_umap_parameters.get("nSamples", DEFAULT_N_SAMPLES), + ) + + +@dataclass +class UMAPParameters: + """ + Can include any of the three keys: "minDist", "nNeighbors", "nSamples" necessary for point cloud initialization + """ + + min_dist: float + n_neighbors: int + n_samples: int + + def __post_init__(self): + if not isinstance(self.min_dist, float) or not ( + MIN_MIN_DIST <= self.min_dist <= MAX_MIN_DIST + ): + raise ValueError( + f"minDist should be of type float and must be between {MIN_MIN_DIST} and {MAX_MIN_DIST}" + ) + + if not isinstance(self.n_neighbors, int) or not ( + MIN_NEIGHBORS <= self.n_neighbors <= MAX_NEIGHBORS + ): + raise ValueError( + f"nNeighbors should be of type int and must be between {MIN_NEIGHBORS} and {MAX_NEIGHBORS}" + ) + + if not isinstance(self.n_samples, int) or not ( + MIN_SAMPLES <= self.n_samples <= MAX_SAMPLES + ): + raise ValueError( + f"nSamples should be of type int and must be between {MIN_SAMPLES} and {MAX_SAMPLES}" + ) diff --git a/src/phoenix/server/api/context.py b/src/phoenix/server/api/context.py index d558407d97..741aea09df 100644 --- a/src/phoenix/server/api/context.py +++ b/src/phoenix/server/api/context.py @@ -8,7 +8,7 @@ from phoenix.core.model_schema import Model from phoenix.core.traces import Traces -from phoenix.core.umap_parameters import UMAPParameters +from phoenix.pointcloud.umap_parameters import UMAPParameters @dataclass @@ -19,4 +19,4 @@ class Context: export_path: Path corpus: Optional[Model] = None traces: Optional[Traces] = None - config: Optional[UMAPParameters] = None + umap_parameters: UMAPParameters diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index 00b4f9449d..0e00d98db1 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -20,7 +20,7 @@ from phoenix.config import SERVER_DIR from phoenix.core.model_schema import Model from phoenix.core.traces import Traces -from phoenix.core.umap_parameters import UMAPParameters +from phoenix.pointcloud.umap_parameters import UMAPParameters from phoenix.server.api.context import Context from phoenix.server.api.schema import schema from phoenix.server.span_handler import SpanHandler @@ -66,15 +66,15 @@ def __init__( schema: BaseSchema, model: Model, export_path: Path, + umap_parameters: UMAPParameters, graphiql: bool = False, corpus: Optional[Model] = None, traces: Optional[Traces] = None, - config: Optional[UMAPParameters] = None, ) -> None: self.model = model self.corpus = corpus self.traces = traces - self.config = config + self.umap_parameters = umap_parameters self.export_path = export_path super().__init__(schema, graphiql=graphiql) @@ -89,7 +89,7 @@ async def get_context( model=self.model, corpus=self.corpus, traces=self.traces, - config=self.config, + umap_parameters=self.umap_parameters, export_path=self.export_path, ) @@ -112,9 +112,9 @@ async def get(self, request: Request) -> FileResponse: def create_app( export_path: Path, model: Model, + umap_parameters: UMAPParameters, corpus: Optional[Model] = None, traces: Optional[Traces] = None, - config: Optional[UMAPParameters] = None, debug: bool = False, ) -> Starlette: graphql = GraphQLWithContext( @@ -122,7 +122,7 @@ def create_app( model=model, corpus=corpus, traces=traces, - config=config, + umap_parameters=umap_parameters, export_path=export_path, graphiql=True, ) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index aeace43e9b..65155a66a2 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -12,7 +12,7 @@ from phoenix.config import get_env_host, get_env_port, get_exported_files from phoenix.core.model_schema_adapter import create_model_from_datasets from phoenix.core.traces import Traces -from phoenix.core.umap_parameters import UMAPParameters +from phoenix.pointcloud.umap_parameters import get_umap_parameters from phoenix.datasets.dataset import EMPTY_DATASET, Dataset from phoenix.server.app import create_app from phoenix.server.thread_server import ThreadServer @@ -78,11 +78,7 @@ def __init__( self.reference_dataset = reference_dataset self.corpus_dataset = corpus_dataset self.trace_dataset = trace_dataset - self.config = ( - UMAPParameters(default_umap_parameters) - if default_umap_parameters is not None - else None, - ) + self.umap_parameters = get_umap_parameters(default_umap_parameters) self.model = create_model_from_datasets( primary_dataset, reference_dataset, @@ -254,7 +250,7 @@ def __init__( model=self.model, corpus=self.corpus, traces=self.traces, - config=self.config, + umap_parameters=self.umap_parameters, ) self.server = ThreadServer( app=self.app, @@ -342,7 +338,7 @@ def launch_app( # TODO: catch exceptions from thread else: _session = ProcessSession( - primary, reference, corpus, trace, default_umap_parameters, host=host, port=port + primary, reference, corpus, trace, default_umap_parameters, host=host, port=port ) if not _session.active: From b71de1a8f300ac90b544084211ea35c2ea760c0f Mon Sep 17 00:00:00 2001 From: Pranav Badhe Date: Mon, 25 Sep 2023 22:29:28 -0400 Subject: [PATCH 05/20] CamelCase -> SnakeCase Co-authored-by: Mikyo King --- src/phoenix/session/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 69880c0b92..33a4a17d96 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -302,7 +302,7 @@ def launch_app( run_in_thread: bool, optional, default=True Whether the server should run in a Thread or Process. default_umap_parameters: Dict[str, Union[int, float]], optional, default=None - User specified default UMAP parameters eg: {nNeighbors: 10, nSamples: 5, minDist: 0.5} + User specified default UMAP parameters eg: {n_neighbors: 10, n_samples: 5, min_dist: 0.5} Returns ------- From e1f9304152a7ab2aa80c8d671ebe3142a203fad8 Mon Sep 17 00:00:00 2001 From: Pranav Badhe Date: Mon, 25 Sep 2023 22:29:55 -0400 Subject: [PATCH 06/20] CamelCase -> SnakeCase all suggested changes Co-authored-by: Mikyo King --- src/phoenix/pointcloud/umap_parameters.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index 3a4792e51e..dc83fe853f 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -14,9 +14,10 @@ def get_umap_parameters(default_umap_parameters): return UMAPParameters( - min_dist=default_umap_parameters.get("minDist", DEFAULT_MIN_DIST), - n_neighbors=default_umap_parameters.get("nNeighbors", DEFAULT_N_NEIGHBORS), - n_samples=default_umap_parameters.get("nSamples", DEFAULT_N_SAMPLES), + min_dist=default_umap_parameters.get("min_dist", DEFAULT_MIN_DIST), + n_neighbors=default_umap_parameters.get("n_neighbors", DEFAULT_N_NEIGHBORS), + n_samples=default_umap_parameters.get("n_samples", DEFAULT_N_SAMPLES), + ) From 91f5f47035c27d23295d777dfdca39fe5dc5f957 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Wed, 27 Sep 2023 03:03:58 -0400 Subject: [PATCH 07/20] Default UMAPparam changes Removed hardcoded defaults for single source of truth. --- app/src/constants/pointCloudConstants.tsx | 7 ++---- app/src/store/pointCloudStore.ts | 12 +++------ app/src/window.d.ts | 12 +++++++++ src/phoenix/pointcloud/umap_parameters.py | 30 ++++++++++------------- src/phoenix/server/api/context.py | 2 -- src/phoenix/server/app.py | 14 +++++++---- src/phoenix/server/templates/index.html | 3 +++ src/phoenix/session/session.py | 9 ++++--- 8 files changed, 47 insertions(+), 42 deletions(-) create mode 100644 app/src/window.d.ts diff --git a/app/src/constants/pointCloudConstants.tsx b/app/src/constants/pointCloudConstants.tsx index 9618306309..8031ec1ad1 100644 --- a/app/src/constants/pointCloudConstants.tsx +++ b/app/src/constants/pointCloudConstants.tsx @@ -2,17 +2,14 @@ * UMAP configuration parameters * @src: https://umap-learn.readthedocs.io/en/latest/parameters.html */ -export const DEFAULT_N_COMPONENTS = 3; -export const DEFAULT_N_NEIGHBORS = 30; export const MIN_N_NEIGHBORS = 5; export const MAX_N_NEIGHBORS = 100; -export const DEFAULT_MIN_DIST = 0.0; export const MIN_MIN_DIST = 0.0; export const MAX_MIN_DIST = 0.99; /** - * The default sample size for a single dataset for UMAP, if a primary and reference dataset are requested, the entire cloud will be twice this number + * The default sample size is passed as Config from server for a single dataset for UMAP, if a + * primary and reference dataset are requested, the entire cloud will be twice this number */ -export const DEFAULT_DATASET_SAMPLE_SIZE = 500; export const MIN_DATASET_SAMPLE_SIZE = 300; export const MAX_DATASET_SAMPLE_SIZE = 100000; diff --git a/app/src/store/pointCloudStore.ts b/app/src/store/pointCloudStore.ts index e84132a21a..e2a1e328bf 100644 --- a/app/src/store/pointCloudStore.ts +++ b/app/src/store/pointCloudStore.ts @@ -12,10 +12,7 @@ import { DEFAULT_CLUSTER_MIN_SAMPLES, DEFAULT_CLUSTER_SELECTION_EPSILON, DEFAULT_COLOR_SCHEME, - DEFAULT_DATASET_SAMPLE_SIZE, DEFAULT_MIN_CLUSTER_SIZE, - DEFAULT_MIN_DIST, - DEFAULT_N_NEIGHBORS, FALLBACK_COLOR, SelectionDisplay, SelectionGridSize, @@ -69,17 +66,14 @@ type DimensionMetadata = { export type UMAPParameters = { /** * Minimum distance between points in the eUMAP projection - * @default 0 */ minDist: number; /** * The number of neighbors to require for the UMAP projection - * @default 30 */ nNeighbors: number; /** * The number of samples to use for the UMAP projection. The sample number is per dataset. - * @default 500 */ nSamples: number; }; @@ -605,9 +599,9 @@ export const createPointCloudStore = (initProps?: Partial) => { dimension: null, dimensionMetadata: null, umapParameters: { - minDist: DEFAULT_MIN_DIST, - nNeighbors: DEFAULT_N_NEIGHBORS, - nSamples: DEFAULT_DATASET_SAMPLE_SIZE, + minDist: window.Config.minDist, + nNeighbors: window.Config.nNeighbors, + nSamples: window.Config.nSamples, }, hdbscanParameters: { minClusterSize: DEFAULT_MIN_CLUSTER_SIZE, diff --git a/app/src/window.d.ts b/app/src/window.d.ts new file mode 100644 index 0000000000..d730b4670d --- /dev/null +++ b/app/src/window.d.ts @@ -0,0 +1,12 @@ +export {}; + +declare global { + interface Window { + Config: { + hasCorpus: boolean; + minDist: number; + nNeighbors: number; + nSamples: number; + }; + } +} diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index dc83fe853f..d5bc5cf893 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Dict, Union DEFAULT_MIN_DIST = 0.0 DEFAULT_N_NEIGHBORS = 30 @@ -12,43 +13,38 @@ MAX_MIN_DIST = 0.99 -def get_umap_parameters(default_umap_parameters): - return UMAPParameters( - min_dist=default_umap_parameters.get("min_dist", DEFAULT_MIN_DIST), - n_neighbors=default_umap_parameters.get("n_neighbors", DEFAULT_N_NEIGHBORS), - n_samples=default_umap_parameters.get("n_samples", DEFAULT_N_SAMPLES), - - ) - - @dataclass class UMAPParameters: - """ - Can include any of the three keys: "minDist", "nNeighbors", "nSamples" necessary for point cloud initialization - """ - min_dist: float n_neighbors: int n_samples: int - def __post_init__(self): + def __post_init__(self) -> None: if not isinstance(self.min_dist, float) or not ( MIN_MIN_DIST <= self.min_dist <= MAX_MIN_DIST ): raise ValueError( - f"minDist should be of type float and must be between {MIN_MIN_DIST} and {MAX_MIN_DIST}" + f"minDist must be float type, and between {MIN_MIN_DIST} and {MAX_MIN_DIST}" ) if not isinstance(self.n_neighbors, int) or not ( MIN_NEIGHBORS <= self.n_neighbors <= MAX_NEIGHBORS ): raise ValueError( - f"nNeighbors should be of type int and must be between {MIN_NEIGHBORS} and {MAX_NEIGHBORS}" + f"nNeighbors must be int type, and between {MIN_NEIGHBORS} and {MAX_NEIGHBORS}" ) if not isinstance(self.n_samples, int) or not ( MIN_SAMPLES <= self.n_samples <= MAX_SAMPLES ): raise ValueError( - f"nSamples should be of type int and must be between {MIN_SAMPLES} and {MAX_SAMPLES}" + f"nSamples must be int type, and between {MIN_SAMPLES} and {MAX_SAMPLES}" ) + + +def get_umap_parameters(default_umap_parameters: Dict[str, Union[int, float]]) -> UMAPParameters: + return UMAPParameters( + min_dist=default_umap_parameters.get("min_dist", DEFAULT_MIN_DIST), + n_neighbors=default_umap_parameters.get("n_neighbors", DEFAULT_N_NEIGHBORS), + n_samples=default_umap_parameters.get("n_samples", DEFAULT_N_SAMPLES), + ) diff --git a/src/phoenix/server/api/context.py b/src/phoenix/server/api/context.py index 741aea09df..2d6a95be7e 100644 --- a/src/phoenix/server/api/context.py +++ b/src/phoenix/server/api/context.py @@ -8,7 +8,6 @@ from phoenix.core.model_schema import Model from phoenix.core.traces import Traces -from phoenix.pointcloud.umap_parameters import UMAPParameters @dataclass @@ -19,4 +18,3 @@ class Context: export_path: Path corpus: Optional[Model] = None traces: Optional[Traces] = None - umap_parameters: UMAPParameters diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index eb57c7d599..990efaa2e9 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -26,7 +26,6 @@ from phoenix.server.api.schema import schema from phoenix.server.span_handler import SpanHandler - logger = logging.getLogger(__name__) templates = Jinja2Templates(directory=SERVER_DIR / "templates") @@ -34,6 +33,9 @@ class AppConfig(NamedTuple): has_corpus: bool + min_dist: float + n_neighbors: int + n_samples: int class Static(StaticFiles): @@ -59,6 +61,9 @@ async def get_response(self, path: str, scope: Scope) -> Response: "index.html", context={ "has_corpus": self._app_config.has_corpus, + "min_dist": self._app_config.min_dist, + "n_neighbors": self._app_config.n_neighbors, + "n_samples": self._app_config.n_samples, "request": Request(scope), }, ) @@ -84,7 +89,6 @@ def __init__( schema: BaseSchema, model: Model, export_path: Path, - umap_parameters: UMAPParameters, graphiql: bool = False, corpus: Optional[Model] = None, traces: Optional[Traces] = None, @@ -92,7 +96,6 @@ def __init__( self.model = model self.corpus = corpus self.traces = traces - self.umap_parameters = umap_parameters self.export_path = export_path super().__init__(schema, graphiql=graphiql) @@ -107,7 +110,6 @@ async def get_context( model=self.model, corpus=self.corpus, traces=self.traces, - umap_parameters=self.umap_parameters, export_path=self.export_path, ) @@ -140,7 +142,6 @@ def create_app( model=model, corpus=corpus, traces=traces, - umap_parameters=umap_parameters, export_path=export_path, graphiql=True, ) @@ -183,6 +184,9 @@ def create_app( directory=SERVER_DIR / "static", app_config=AppConfig( has_corpus=corpus is not None, + min_dist=umap_parameters.min_dist, + n_neighbors=umap_parameters.n_neighbors, + n_samples=umap_parameters.n_samples, ), ), name="static", diff --git a/src/phoenix/server/templates/index.html b/src/phoenix/server/templates/index.html index 76e785ecc2..9ecbec9ae8 100644 --- a/src/phoenix/server/templates/index.html +++ b/src/phoenix/server/templates/index.html @@ -23,6 +23,9 @@ // injected into the client before React runs value: Object.freeze({ hasCorpus: Boolean("{{has_corpus}}"), + minDist: parseFloat("{{min_dist}}"), + nNeighbors: parseInt("{{n_neighbors}}"), + nSamples: parseInt("{{n_samples}}"), }), writable: false }); diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 33a4a17d96..3874a87170 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -12,8 +12,8 @@ from phoenix.config import get_env_host, get_env_port, get_exported_files from phoenix.core.model_schema_adapter import create_model_from_datasets from phoenix.core.traces import Traces -from phoenix.pointcloud.umap_parameters import get_umap_parameters from phoenix.datasets.dataset import EMPTY_DATASET, Dataset +from phoenix.pointcloud.umap_parameters import get_umap_parameters from phoenix.server.app import create_app from phoenix.server.thread_server import ThreadServer from phoenix.services import AppService @@ -70,7 +70,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, int]] = None, + default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, host: Optional[str] = None, port: Optional[int] = None, ): @@ -274,7 +274,7 @@ def launch_app( reference: Optional[Dataset] = None, corpus: Optional[Dataset] = None, trace: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, + default_umap_parameters: Optional[Dict[str, Union[int, float]]] = {}, host: Optional[str] = None, port: Optional[int] = None, run_in_thread: bool = True, @@ -302,7 +302,8 @@ def launch_app( run_in_thread: bool, optional, default=True Whether the server should run in a Thread or Process. default_umap_parameters: Dict[str, Union[int, float]], optional, default=None - User specified default UMAP parameters eg: {n_neighbors: 10, n_samples: 5, min_dist: 0.5} + User specified default UMAP parameters + eg: {"n_neighbors": 10, "n_samples": 5, "min_dist": 0.5} Returns ------- From eb16a0d558fe1d8efbb346e216d323f783997f75 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Fri, 29 Sep 2023 11:48:55 -0400 Subject: [PATCH 08/20] typing hints & process_session() Process_session() error. Logs at -> https://textdoc.co/maM9oCZwexUA4EWQ --- src/phoenix/pointcloud/umap_parameters.py | 14 +++++++++----- src/phoenix/server/app.py | 8 ++++---- src/phoenix/server/main.py | 5 +++++ src/phoenix/services.py | 4 ++++ src/phoenix/session/session.py | 15 ++++++++++----- 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index d5bc5cf893..e02c471fe6 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, Union +from typing import Dict, Optional, Union DEFAULT_MIN_DIST = 0.0 DEFAULT_N_NEIGHBORS = 30 @@ -42,9 +42,13 @@ def __post_init__(self) -> None: ) -def get_umap_parameters(default_umap_parameters: Dict[str, Union[int, float]]) -> UMAPParameters: +def get_umap_parameters( + default_umap_parameters: Optional[Dict[str, Union[int, float]]] +) -> UMAPParameters: + if not default_umap_parameters: + default_umap_parameters = dict() return UMAPParameters( - min_dist=default_umap_parameters.get("min_dist", DEFAULT_MIN_DIST), - n_neighbors=default_umap_parameters.get("n_neighbors", DEFAULT_N_NEIGHBORS), - n_samples=default_umap_parameters.get("n_samples", DEFAULT_N_SAMPLES), + min_dist=float(default_umap_parameters.get("min_dist", DEFAULT_MIN_DIST)), + n_neighbors=int(default_umap_parameters.get("n_neighbors", DEFAULT_N_NEIGHBORS)), + n_samples=int(default_umap_parameters.get("n_samples", DEFAULT_N_SAMPLES)), ) diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index 990efaa2e9..bfbb114cab 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -132,7 +132,7 @@ async def get(self, request: Request) -> FileResponse: def create_app( export_path: Path, model: Model, - umap_parameters: UMAPParameters, + umap_params: UMAPParameters, corpus: Optional[Model] = None, traces: Optional[Traces] = None, debug: bool = False, @@ -184,9 +184,9 @@ def create_app( directory=SERVER_DIR / "static", app_config=AppConfig( has_corpus=corpus is not None, - min_dist=umap_parameters.min_dist, - n_neighbors=umap_parameters.n_neighbors, - n_samples=umap_parameters.n_samples, + min_dist=umap_params.min_dist, + n_neighbors=umap_params.n_neighbors, + n_samples=umap_params.n_samples, ), ), name="static", diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index 42a0207860..f47b11e78b 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -14,6 +14,7 @@ from phoenix.core.traces import Traces from phoenix.datasets.dataset import EMPTY_DATASET, Dataset from phoenix.datasets.fixtures import FIXTURES, get_datasets +from phoenix.pointcloud.umap_parameters import UMAPParameters from phoenix.server.app import create_app from phoenix.trace.fixtures import ( TRACES_FIXTURES, @@ -63,6 +64,7 @@ def _get_pid_file() -> Path: parser.add_argument("--host", type=str, required=False) parser.add_argument("--port", type=int, required=False) parser.add_argument("--no-internet", action="store_true") + parser.add_argument("--umap_params", type=int, required=True) parser.add_argument("--debug", action="store_false") # TODO: Disable before public launch subparsers = parser.add_subparsers(dest="command", required=True) datasets_parser = subparsers.add_parser("datasets") @@ -120,9 +122,12 @@ def _get_pid_file() -> Path: ), ): traces.put(span) + umap_params_list = args.umap_params.split(",") + umap_params = UMAPParameters(umap_params_list[0], umap_params_list[1], umap_params_list[2]) app = create_app( export_path=export_path, model=model, + umap_params=umap_params, traces=traces, corpus=None if corpus_dataset is None else create_model_from_datasets(corpus_dataset), debug=args.debug, diff --git a/src/phoenix/services.py b/src/phoenix/services.py index 83ac99fbd2..1b4c7544c5 100644 --- a/src/phoenix/services.py +++ b/src/phoenix/services.py @@ -111,6 +111,7 @@ def __init__( host: str, port: int, primary_dataset_name: str, + umap_params: str, reference_dataset_name: Optional[str], corpus_dataset_name: Optional[str], trace_dataset_name: Optional[str], @@ -119,6 +120,7 @@ def __init__( self.host = host self.port = port self.__primary_dataset_name = primary_dataset_name + self.__umap_params = umap_params self.__reference_dataset_name = reference_dataset_name self.__corpus_dataset_name = corpus_dataset_name self.__trace_dataset_name = trace_dataset_name @@ -138,6 +140,8 @@ def command(self) -> List[str]: "datasets", "--primary", str(self.__primary_dataset_name), + "--umap_params", + self.__umap_params, ] if self.__reference_dataset_name is not None: command.extend(["--reference", str(self.__reference_dataset_name)]) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index edff257a8c..983f13d604 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -177,7 +177,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, int]] = None, + default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, host: Optional[str] = None, port: Optional[int] = None, ) -> None: @@ -197,19 +197,24 @@ def __init__( corpus_dataset.to_disc() if isinstance(trace_dataset, TraceDataset): trace_dataset.to_disc() + umap_params_str = ( + f"{self.umap_parameters.min_dist}," + f"{self.umap_parameters.n_neighbors}," + f"{self.umap_parameters.n_samples}" + ) # Initialize an app service that keeps the server running self.app_service = AppService( self.export_path, self.host, self.port, self.primary_dataset.name, + umap_params_str, reference_dataset_name=( self.reference_dataset.name if self.reference_dataset is not None else None ), corpus_dataset_name=( self.corpus_dataset.name if self.corpus_dataset is not None else None ), - # TO-DO default_umap_params for ProcessSession trace_dataset_name=( self.trace_dataset.name if self.trace_dataset is not None else None ), @@ -231,7 +236,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, int]] = None, + default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, host: Optional[str] = None, port: Optional[int] = None, ): @@ -250,7 +255,7 @@ def __init__( model=self.model, corpus=self.corpus, traces=self.traces, - umap_parameters=self.umap_parameters, + umap_params=self.umap_parameters, ) self.server = ThreadServer( app=self.app, @@ -274,7 +279,7 @@ def launch_app( reference: Optional[Dataset] = None, corpus: Optional[Dataset] = None, trace: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, Union[int, float]]] = {}, + default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, host: Optional[str] = None, port: Optional[int] = None, run_in_thread: bool = True, From 0be3164c53fc46079172b6c9865edae974a40169 Mon Sep 17 00:00:00 2001 From: pbadhe Date: Fri, 29 Sep 2023 16:45:32 -0400 Subject: [PATCH 09/20] Prettier check I'd installed the pre-commit hooks, but "black ." was taking too long and checking venv lib packages, so I was doing it manually just on src/*. --- app/src/window.d.ts | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/app/src/window.d.ts b/app/src/window.d.ts index d730b4670d..1328ee84db 100644 --- a/app/src/window.d.ts +++ b/app/src/window.d.ts @@ -1,12 +1,12 @@ -export {}; - -declare global { - interface Window { - Config: { - hasCorpus: boolean; - minDist: number; - nNeighbors: number; - nSamples: number; - }; - } -} +export {}; + +declare global { + interface Window { + Config: { + hasCorpus: boolean; + minDist: number; + nNeighbors: number; + nSamples: number; + }; + } +} From 08f3a57c08e67b9fdc42135d0eebc95c2226886b Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 14:37:34 -0600 Subject: [PATCH 10/20] Adjust frontend --- app/src/constants/pointCloudConstants.tsx | 7 +++++-- app/src/pages/embedding/EmbeddingPage.tsx | 18 +++++++++++++++--- app/src/store/pointCloudStore.ts | 9 ++++++--- app/src/window.d.ts | 8 +++++--- src/phoenix/server/templates/index.html | 8 +++++--- 5 files changed, 36 insertions(+), 14 deletions(-) diff --git a/app/src/constants/pointCloudConstants.tsx b/app/src/constants/pointCloudConstants.tsx index 8031ec1ad1..9618306309 100644 --- a/app/src/constants/pointCloudConstants.tsx +++ b/app/src/constants/pointCloudConstants.tsx @@ -2,14 +2,17 @@ * UMAP configuration parameters * @src: https://umap-learn.readthedocs.io/en/latest/parameters.html */ +export const DEFAULT_N_COMPONENTS = 3; +export const DEFAULT_N_NEIGHBORS = 30; export const MIN_N_NEIGHBORS = 5; export const MAX_N_NEIGHBORS = 100; +export const DEFAULT_MIN_DIST = 0.0; export const MIN_MIN_DIST = 0.0; export const MAX_MIN_DIST = 0.99; /** - * The default sample size is passed as Config from server for a single dataset for UMAP, if a - * primary and reference dataset are requested, the entire cloud will be twice this number + * The default sample size for a single dataset for UMAP, if a primary and reference dataset are requested, the entire cloud will be twice this number */ +export const DEFAULT_DATASET_SAMPLE_SIZE = 500; export const MIN_DATASET_SAMPLE_SIZE = 300; export const MAX_DATASET_SAMPLE_SIZE = 100000; diff --git a/app/src/pages/embedding/EmbeddingPage.tsx b/app/src/pages/embedding/EmbeddingPage.tsx index fd788e9cfb..9d567d58c9 100644 --- a/app/src/pages/embedding/EmbeddingPage.tsx +++ b/app/src/pages/embedding/EmbeddingPage.tsx @@ -223,14 +223,26 @@ export function EmbeddingPage() { const { timeRange } = useTimeRange(); // Initialize the store based on whether or not there is a reference dataset const defaultPointCloudProps = useMemo>(() => { + let defaultPointCloudProps: Partial = {}; if (corpusDataset != null) { // If there is a corpus dataset, then initialize the page with the retrieval troubleshooting settings // TODO - this does make a bit of a leap of assumptions but is a short term solution in order to get the page working as intended - return DEFAULT_RETRIEVAL_TROUBLESHOOTING_POINT_CLOUD_PROPS; + defaultPointCloudProps = + DEFAULT_RETRIEVAL_TROUBLESHOOTING_POINT_CLOUD_PROPS; } else if (referenceDataset != null) { - return DEFAULT_DRIFT_POINT_CLOUD_PROPS; + defaultPointCloudProps = DEFAULT_DRIFT_POINT_CLOUD_PROPS; + } else { + defaultPointCloudProps = DEFAULT_SINGLE_DATASET_POINT_CLOUD_PROPS; } - return DEFAULT_SINGLE_DATASET_POINT_CLOUD_PROPS; + + // Apply the UMAP parameters from the server-sent config + defaultPointCloudProps = { + ...defaultPointCloudProps, + umapParameters: { + ...window.Config.UMAP, + }, + }; + return defaultPointCloudProps; }, [corpusDataset, referenceDataset]); return ( diff --git a/app/src/store/pointCloudStore.ts b/app/src/store/pointCloudStore.ts index e2a1e328bf..52481045ae 100644 --- a/app/src/store/pointCloudStore.ts +++ b/app/src/store/pointCloudStore.ts @@ -12,7 +12,10 @@ import { DEFAULT_CLUSTER_MIN_SAMPLES, DEFAULT_CLUSTER_SELECTION_EPSILON, DEFAULT_COLOR_SCHEME, + DEFAULT_DATASET_SAMPLE_SIZE, DEFAULT_MIN_CLUSTER_SIZE, + DEFAULT_MIN_DIST, + DEFAULT_N_NEIGHBORS, FALLBACK_COLOR, SelectionDisplay, SelectionGridSize, @@ -599,9 +602,9 @@ export const createPointCloudStore = (initProps?: Partial) => { dimension: null, dimensionMetadata: null, umapParameters: { - minDist: window.Config.minDist, - nNeighbors: window.Config.nNeighbors, - nSamples: window.Config.nSamples, + minDist: DEFAULT_MIN_DIST, + nNeighbors: DEFAULT_N_NEIGHBORS, + nSamples: DEFAULT_DATASET_SAMPLE_SIZE, }, hdbscanParameters: { minClusterSize: DEFAULT_MIN_CLUSTER_SIZE, diff --git a/app/src/window.d.ts b/app/src/window.d.ts index 1328ee84db..fc7e2f4008 100644 --- a/app/src/window.d.ts +++ b/app/src/window.d.ts @@ -4,9 +4,11 @@ declare global { interface Window { Config: { hasCorpus: boolean; - minDist: number; - nNeighbors: number; - nSamples: number; + UMAP: { + minDist: number; + nNeighbors: number; + nSamples: number; + }; }; } } diff --git a/src/phoenix/server/templates/index.html b/src/phoenix/server/templates/index.html index 9ecbec9ae8..16784b246e 100644 --- a/src/phoenix/server/templates/index.html +++ b/src/phoenix/server/templates/index.html @@ -23,9 +23,11 @@ // injected into the client before React runs value: Object.freeze({ hasCorpus: Boolean("{{has_corpus}}"), - minDist: parseFloat("{{min_dist}}"), - nNeighbors: parseInt("{{n_neighbors}}"), - nSamples: parseInt("{{n_samples}}"), + UMAP: { + minDist: parseFloat("{{min_dist}}"), + nNeighbors: parseInt("{{n_neighbors}}"), + nSamples: parseInt("{{n_samples}}"), + } }), writable: false }); From 4975122df1d536c0cc3fa0b140c971214649b06f Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 15:10:11 -0600 Subject: [PATCH 11/20] parse string correctly --- app/package.json | 4 ++-- src/phoenix/server/main.py | 23 +++++++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/app/package.json b/app/package.json index d9ca3adfca..28dc2a0737 100644 --- a/app/package.json +++ b/app/package.json @@ -74,8 +74,8 @@ "build:relay": "relay-compiler", "watch": "./esbuild.config.mjs dev", "test": "jest --config ./jest.config.js", - "dev": "npm run dev:server:traces:llama_index_rag & npm run build:static && npm run watch", - "dev:server:mnist": "python3 -m phoenix.server.main fixture fashion_mnist", + "dev": "npm run dev:server:mnist & npm run build:static && npm run watch", + "dev:server:mnist": "python3 -m phoenix.server.main --umap_params 0,30,550 fixture fashion_mnist", "dev:server:mnist:single": "python3 -m phoenix.server.main fixture fashion_mnist --primary-only true", "dev:server:sentiment": "python3 -m phoenix.server.main fixture sentiment_classification_language_drift", "dev:server:image": "python3 -m phoenix.server.main fixture image_classification", diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index f47b11e78b..eb3405de5d 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -14,7 +14,12 @@ from phoenix.core.traces import Traces from phoenix.datasets.dataset import EMPTY_DATASET, Dataset from phoenix.datasets.fixtures import FIXTURES, get_datasets -from phoenix.pointcloud.umap_parameters import UMAPParameters +from phoenix.pointcloud.umap_parameters import ( + DEFAULT_MIN_DIST, + DEFAULT_N_NEIGHBORS, + DEFAULT_N_SAMPLES, + UMAPParameters, +) from phoenix.server.app import create_app from phoenix.trace.fixtures import ( TRACES_FIXTURES, @@ -47,6 +52,8 @@ def _get_pid_file() -> Path: return get_pids_path() / str(os.getpid()) +DEFAULT_UMAP_PARAMS_STR = f"{DEFAULT_MIN_DIST},{DEFAULT_N_NEIGHBORS},{DEFAULT_N_SAMPLES}" + if __name__ == "__main__": primary_dataset_name: str reference_dataset_name: Optional[str] @@ -64,8 +71,8 @@ def _get_pid_file() -> Path: parser.add_argument("--host", type=str, required=False) parser.add_argument("--port", type=int, required=False) parser.add_argument("--no-internet", action="store_true") - parser.add_argument("--umap_params", type=int, required=True) - parser.add_argument("--debug", action="store_false") # TODO: Disable before public launch + parser.add_argument("--umap_params", type=str, required=False, default=DEFAULT_UMAP_PARAMS_STR) + parser.add_argument("--debug", action="store_false") subparsers = parser.add_subparsers(dest="command", required=True) datasets_parser = subparsers.add_parser("datasets") datasets_parser.add_argument("--primary", type=str, required=True) @@ -123,7 +130,10 @@ def _get_pid_file() -> Path: ): traces.put(span) umap_params_list = args.umap_params.split(",") - umap_params = UMAPParameters(umap_params_list[0], umap_params_list[1], umap_params_list[2]) + umap_params = UMAPParameters( + float(umap_params_list[0]), int(umap_params_list[1]), int(umap_params_list[2]) + ) + logger.info(f"Server umap params: {umap_params}") app = create_app( export_path=export_path, model=model, @@ -137,3 +147,8 @@ def _get_pid_file() -> Path: server = Server(config=Config(app, host=host, port=port)) Thread(target=_write_pid_file_when_ready, args=(server,), daemon=True).start() server.run() + host = args.host or get_env_host() + port = args.port or get_env_port() + server = Server(config=Config(app, host=host, port=port)) + Thread(target=_write_pid_file_when_ready, args=(server,), daemon=True).start() + server.run() From aa2fb8212365f5e12b2cac67f89606e294f2ee7b Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 15:13:02 -0600 Subject: [PATCH 12/20] fix duplicate lines --- src/phoenix/server/main.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index eb3405de5d..076e13ba04 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -147,8 +147,3 @@ def _get_pid_file() -> Path: server = Server(config=Config(app, host=host, port=port)) Thread(target=_write_pid_file_when_ready, args=(server,), daemon=True).start() server.run() - host = args.host or get_env_host() - port = args.port or get_env_port() - server = Server(config=Config(app, host=host, port=port)) - Thread(target=_write_pid_file_when_ready, args=(server,), daemon=True).start() - server.run() From 7687b3bbba23167b20e87373b10f2123a3bf1666 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:00:25 -0600 Subject: [PATCH 13/20] Update umap_parameters.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/pointcloud/umap_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index e02c471fe6..d9ef0f8917 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -38,7 +38,7 @@ def __post_init__(self) -> None: MIN_SAMPLES <= self.n_samples <= MAX_SAMPLES ): raise ValueError( - f"nSamples must be int type, and between {MIN_SAMPLES} and {MAX_SAMPLES}" + f"nSamples must be int type, and between {MIN_SAMPLES} and {MAX_SAMPLES}" ) From 7aa84a669a3ad29e24aeb1dd648bf36b3bc77a8f Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:00:49 -0600 Subject: [PATCH 14/20] Update umap_parameters.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/pointcloud/umap_parameters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index d9ef0f8917..24b30477fe 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -15,9 +15,9 @@ @dataclass class UMAPParameters: - min_dist: float - n_neighbors: int - n_samples: int + min_dist: float = DEFAULT_MIN_DIST + n_neighbors: int = DEFAULT_N_NEIGHBORS + n_samples: int = DEFAULT_N_SAMPLES def __post_init__(self) -> None: if not isinstance(self.min_dist, float) or not ( From 83f6ea5b88a85dd498911bfae2c962f4521a27d8 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:01:01 -0600 Subject: [PATCH 15/20] Update umap_parameters.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/pointcloud/umap_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index 24b30477fe..c13db8c176 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -46,7 +46,7 @@ def get_umap_parameters( default_umap_parameters: Optional[Dict[str, Union[int, float]]] ) -> UMAPParameters: if not default_umap_parameters: - default_umap_parameters = dict() + return UMAPParameters() return UMAPParameters( min_dist=float(default_umap_parameters.get("min_dist", DEFAULT_MIN_DIST)), n_neighbors=int(default_umap_parameters.get("n_neighbors", DEFAULT_N_NEIGHBORS)), From 800f85b4d65e575aa6bfe12247d374708e11e613 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:19:51 -0600 Subject: [PATCH 16/20] Update src/phoenix/session/session.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/session/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 983f13d604..303b060c39 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -177,7 +177,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, + default_umap_parameters: Optional[Mapping[str, Any]] = None, host: Optional[str] = None, port: Optional[int] = None, ) -> None: From 7ff011f650a4942894bb6824e061a49b3f54d989 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:19:57 -0600 Subject: [PATCH 17/20] Update src/phoenix/session/session.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/session/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 303b060c39..afa9864413 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -236,7 +236,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, + default_umap_parameters: Optional[Mapping[str, Any]] = None, host: Optional[str] = None, port: Optional[int] = None, ): From 194a1ea9cc185f17f0167ba96175b2da20cd168e Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:20:04 -0600 Subject: [PATCH 18/20] Update src/phoenix/session/session.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/session/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index afa9864413..77fd3e2ff9 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -279,7 +279,7 @@ def launch_app( reference: Optional[Dataset] = None, corpus: Optional[Dataset] = None, trace: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, + default_umap_parameters: Optional[Mapping[str, Any]] = None, host: Optional[str] = None, port: Optional[int] = None, run_in_thread: bool = True, From 4a30132684ecbb1c2e9fca077cef2f1cabfe27da Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:20:44 -0600 Subject: [PATCH 19/20] use positional arguments --- src/phoenix/server/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index 076e13ba04..cf6f651bfb 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -131,7 +131,9 @@ def _get_pid_file() -> Path: traces.put(span) umap_params_list = args.umap_params.split(",") umap_params = UMAPParameters( - float(umap_params_list[0]), int(umap_params_list[1]), int(umap_params_list[2]) + min_dist=float(umap_params_list[0]), + n_neighbors=int(umap_params_list[1]), + n_samples=int(umap_params_list[2]), ) logger.info(f"Server umap params: {umap_params}") app = create_app( From fbf87e4070165fda03ac3239be5e43f838f03ae6 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 4 Oct 2023 19:24:13 -0600 Subject: [PATCH 20/20] fix style and imports --- src/phoenix/pointcloud/umap_parameters.py | 6 ++---- src/phoenix/session/session.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/phoenix/pointcloud/umap_parameters.py b/src/phoenix/pointcloud/umap_parameters.py index c13db8c176..a1f93c972d 100644 --- a/src/phoenix/pointcloud/umap_parameters.py +++ b/src/phoenix/pointcloud/umap_parameters.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, Optional, Union +from typing import Any, Mapping, Optional DEFAULT_MIN_DIST = 0.0 DEFAULT_N_NEIGHBORS = 30 @@ -42,9 +42,7 @@ def __post_init__(self) -> None: ) -def get_umap_parameters( - default_umap_parameters: Optional[Dict[str, Union[int, float]]] -) -> UMAPParameters: +def get_umap_parameters(default_umap_parameters: Optional[Mapping[str, Any]]) -> UMAPParameters: if not default_umap_parameters: return UMAPParameters() return UMAPParameters( diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 77fd3e2ff9..4dfd1f0e01 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -5,7 +5,15 @@ from datetime import datetime from pathlib import Path from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Union +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + List, + Mapping, + Optional, + Set, +) import pandas as pd @@ -70,7 +78,7 @@ def __init__( reference_dataset: Optional[Dataset] = None, corpus_dataset: Optional[Dataset] = None, trace_dataset: Optional[TraceDataset] = None, - default_umap_parameters: Optional[Dict[str, Union[int, float]]] = None, + default_umap_parameters: Optional[Mapping[str, Any]] = None, host: Optional[str] = None, port: Optional[int] = None, ):