Skip to content

Commit

Permalink
Merge branch 'main' into itai/chroma-docs-search
Browse files Browse the repository at this point in the history
  • Loading branch information
itaismith authored Dec 20, 2024
2 parents 497df93 + e98f930 commit 688aa4c
Show file tree
Hide file tree
Showing 144 changed files with 5,181 additions and 5,558 deletions.
46 changes: 43 additions & 3 deletions .github/actions/python/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,59 @@ inputs:
python-version:
description: "Python version to use"
required: false
default: "3.8"
default: "3.9"
runs:
using: "composite"
steps:
- name: Set up Python 3.9 for protos
uses: actions/setup-python@v5
with:
python-version: "3.9"
cache: "pip"
cache-dependency-path: "requirements*.txt"
- name: Install proto dependencies
run: |
python -m pip install grpcio==1.58.0 grpcio-tools==1.58.0
shell: bash
- name: Generate Proto Files
if: runner.os != 'Windows'
run: make -C idl proto_python
shell: bash
- name: Generate Proto Files (Windows)
if: runner.os == 'Windows'
run: cd idl && make proto_python
shell: cmd
- name: Uninstall proto dependencies
run: |
python -m pip uninstall -y grpcio grpcio-tools
shell: bash
- name: Set up Python ${{ inputs.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python-version }}
cache: "pip"
cache-dependency-path: "requirements*.txt"
- name: Install test dependencies
run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt
- name: Install dependencies
run: |
python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt
shell: bash
- name: Install protobuf compiler (protoc) - Linux
if: runner.os != 'Windows'
run: |
sudo apt-get update
sudo apt-get install -y wget unzip
wget https://github.com/protocolbuffers/protobuf/releases/download/v28.2/protoc-28.2-linux-x86_64.zip
sudo unzip protoc-28.2-linux-x86_64.zip -d /usr/local/
sudo rm protoc-28.2-linux-x86_64.zip
shell: bash
- name: Install protobuf compiler (protoc) - Windows
if: runner.os == 'Windows'
run: |
Invoke-WebRequest -Uri https://github.com/protocolbuffers/protobuf/releases/download/v28.2/protoc-28.2-win64.zip -OutFile protoc.zip
Expand-Archive -Path protoc.zip -DestinationPath C:\protoc
echo "C:\protoc\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
Remove-Item protoc.zip
shell: pwsh
- name: Upgrade SQLite
run: python bin/windows_upgrade_sqlite.py
shell: bash
Expand Down
10 changes: 7 additions & 3 deletions .github/workflows/_python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
python_versions:
description: 'Python versions to test (as json array)'
required: false
default: '["3.8"]'
default: '["3.9"]'
type: string
property_testing_preset:
description: 'Property testing preset'
Expand All @@ -31,6 +31,7 @@ jobs:
"chromadb/test/property/test_embeddings.py",
"chromadb/test/property/test_filtering.py",
"chromadb/test/property/test_persist.py",
"chromadb/test/property/test_sysdb.py",
"chromadb/test/property/test_restart_persist.py"]
include:
- test-globs: "chromadb/test/property/test_embeddings.py"
Expand Down Expand Up @@ -61,11 +62,12 @@ jobs:
"chromadb/test/test_cli.py",
"chromadb/test/auth/test_simple_rbac_authz.py",
"chromadb/test/property/test_collections.py",
"chromadb/test/property/test_collections_with_database_tenant.py",
"chromadb/test/property/test_collections_with_database_tenant.py",
"chromadb/test/property/test_cross_version_persist.py",
"chromadb/test/property/test_embeddings.py",
"chromadb/test/property/test_filtering.py",
"chromadb/test/property/test_persist.py"]
"chromadb/test/property/test_persist.py",
"chromadb/test/property/test_sysdb.py"]
include:
- platform: depot-ubuntu-22.04
env-file: compose-env.linux
Expand All @@ -92,12 +94,14 @@ jobs:
platform: ["depot-ubuntu-22.04-16"]
test-globs: ["chromadb/test/db/test_system.py",
"chromadb/test/api/test_collection.py",
"chromadb/test/api/test_limit_offset.py",
"chromadb/test/property/test_collections.py",
"chromadb/test/property/test_add.py",
"chromadb/test/property/test_filtering.py",
"chromadb/test/property/test_embeddings.py",
"chromadb/test/property/test_collections_with_database_tenant.py",
"chromadb/test/property/test_collections_with_database_tenant_overwrite.py",
"chromadb/test/property/test_sysdb.py",
"chromadb/test/ingest/test_producer_consumer.py",
"chromadb/test/segment/distributed/test_memberlist_provider.py",
"chromadb/test/test_logservice.py",
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ jobs:
uses: actions/checkout@v4
- uses: ./.github/actions/python
with:
python-version: "3.12"
python-version: "3.11"
- name: Setup Rust
uses: ./.github/actions/rust
- name: Run pre-commit
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release-chromadb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.8'
python-version: '3.9'
- name: Install setuptools_scm
run: python -m pip install setuptools_scm
- name: Get Release Version
Expand All @@ -50,7 +50,7 @@ jobs:
python-tests:
uses: ./.github/workflows/_python-tests.yml
with:
python_versions: '["3.8", "3.9", "3.10", "3.11", "3.12"]'
python_versions: '["3.9", "3.10", "3.11", "3.12"]'
property_testing_preset: 'normal'

javascript-client-tests:
Expand Down
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 27 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,34 +1,59 @@
FROM python:3.11-slim-bookworm AS builder
ARG REBUILD_HNSWLIB
ARG PROTOBUF_VERSION=28.2
RUN apt-get update --fix-missing && apt-get install -y --fix-missing \
build-essential \
gcc \
g++ \
cmake \
autoconf && \
autoconf \
python3-dev \
unzip \
curl \
make && \
rm -rf /var/lib/apt/lists/* && \
mkdir /install

# Install specific Protobuf compiler (v28.2)
RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-x86_64.zip -d /usr/local/ && \
rm protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
chmod +x /usr/local/bin/protoc && \
protoc --version # Verify installed version

WORKDIR /install

COPY ./requirements.txt requirements.txt

RUN --mount=type=cache,target=/root/.cache/pip pip install --upgrade --prefix="/install" -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip if [ "$REBUILD_HNSWLIB" = "true" ]; then pip install --no-binary :all: --force-reinstall --prefix="/install" chroma-hnswlib; fi

# Install gRPC tools for Python with fixed version
RUN pip install grpcio==1.58.0 grpcio-tools==1.58.0

# Copy source files to build Protobufs
COPY ./ /chroma

# Generate Protobufs
WORKDIR /chroma
RUN make -C idl proto_python

FROM python:3.11-slim-bookworm AS final

# Create working directory
RUN mkdir /chroma
WORKDIR /chroma

# Copy entrypoint
COPY ./bin/docker_entrypoint.sh /docker_entrypoint.sh

RUN apt-get update --fix-missing && apt-get install -y curl && \
chmod +x /docker_entrypoint.sh && \
rm -rf /var/lib/apt/lists/*

# Copy built dependencies and generated Protobufs
COPY --from=builder /install /usr/local
COPY ./ /chroma
COPY --from=builder /chroma /chroma

ENV CHROMA_HOST_ADDR="0.0.0.0"
ENV CHROMA_HOST_PORT=8000
Expand Down
2 changes: 1 addition & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,4 @@ k8s_resource('prometheus', resource_deps=['k8s_setup'], labels=["observability"]
k8s_resource('otel-collector', resource_deps=['k8s_setup'], labels=["observability"])

# Local S3
k8s_resource('minio-deployment', resource_deps=['k8s_setup'], labels=["debug"], port_forwards='9000:9000')
k8s_resource('minio-deployment', resource_deps=['k8s_setup'], labels=["debug"], port_forwards=['9000:9000', '9005:9005'])
2 changes: 1 addition & 1 deletion chromadb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@

__settings = Settings()

__version__ = "0.5.23"
__version__ = "0.6.0"


# Workaround to deal with Colab's old sqlite3 version
Expand Down
34 changes: 24 additions & 10 deletions chromadb/api/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def _get(
}
)

coll = self._get_collection(collection_id)
scan = self._scan(collection_id)

# TODO: Replace with unified validation
if where is not None:
Expand Down Expand Up @@ -619,7 +619,7 @@ def _get(

return self._executor.get(
GetPlan(
Scan(coll),
scan,
Filter(ids, where, where_document),
Limit(offset or 0, limit),
Projection(
Expand Down Expand Up @@ -676,7 +676,7 @@ def _delete(
"""
)

coll = self._get_collection(collection_id)
scan = self._scan(collection_id)

self._quota_enforcer.enforce(
action=Action.DELETE,
Expand All @@ -690,7 +690,7 @@ def _delete(

if (where or where_document) or not ids:
ids_to_delete = self._executor.get(
GetPlan(Scan(coll), Filter(ids, where, where_document))
GetPlan(scan, Filter(ids, where, where_document))
)["ids"]
else:
ids_to_delete = ids
Expand All @@ -701,7 +701,7 @@ def _delete(
records_to_submit = list(
_records(operation=t.Operation.DELETE, ids=ids_to_delete)
)
self._validate_embedding_record_set(coll, records_to_submit)
self._validate_embedding_record_set(scan.collection, records_to_submit)
self._producer.submit_embeddings(collection_id, records_to_submit)

self._product_telemetry_client.capture(
Expand All @@ -726,8 +726,7 @@ def _count(
database: str = DEFAULT_DATABASE,
) -> int:
add_attributes_to_current_span({"collection_id": str(collection_id)})
coll = self._get_collection(collection_id)
return self._executor.count(CountPlan(Scan(coll)))
return self._executor.count(CountPlan(self._scan(collection_id)))

@trace_method("SegmentAPI._query", OpenTelemetryGranularity.OPERATION)
# We retry on version mismatch errors because the version of the collection
Expand Down Expand Up @@ -785,9 +784,9 @@ def _query(
if where_document is not None:
validate_where_document(where_document)

coll = self._get_collection(collection_id)
scan = self._scan(collection_id)
for embedding in query_embeddings:
self._validate_dimension(coll, len(embedding), update=False)
self._validate_dimension(scan.collection, len(embedding), update=False)

self._quota_enforcer.enforce(
action=Action.QUERY,
Expand All @@ -800,7 +799,7 @@ def _query(

return self._executor.knn(
KNNPlan(
Scan(coll),
scan,
KNN(query_embeddings, n_results),
Filter(None, where, where_document),
Projection(
Expand Down Expand Up @@ -893,6 +892,21 @@ def _get_collection(self, collection_id: UUID) -> t.Collection:
)
return collections[0]

@trace_method("SegmentAPI._scan", OpenTelemetryGranularity.ALL)
def _scan(self, collection_id: UUID) -> Scan:
collection_and_segments = self._sysdb.get_collection_with_segments(collection_id)
# For now collection should have exactly one segment per scope:
# - Local scopes: vector, metadata
# - Distributed scopes: vector, metadata, record
scope_to_segment = {segment["scope"]: segment for segment in collection_and_segments["segments"]}
return Scan(
collection=collection_and_segments["collection"],
knn=scope_to_segment[t.SegmentScope.VECTOR],
metadata=scope_to_segment[t.SegmentScope.METADATA],
# Local chroma do not have record segment, and this is not used by the local executor
record=scope_to_segment.get(t.SegmentScope.RECORD, None), # type: ignore[arg-type]
)


def _records(
operation: t.Operation,
Expand Down
20 changes: 20 additions & 0 deletions chromadb/db/impl/grpc/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
DeleteSegmentRequest,
GetCollectionsRequest,
GetCollectionsResponse,
GetCollectionWithSegmentsRequest,
GetCollectionWithSegmentsResponse,
GetDatabaseRequest,
GetSegmentsRequest,
GetTenantRequest,
Expand All @@ -33,6 +35,7 @@
from chromadb.telemetry.opentelemetry.grpc import OtelInterceptor
from chromadb.types import (
Collection,
CollectionAndSegments,
Database,
Metadata,
OptionalArgument,
Expand Down Expand Up @@ -363,6 +366,23 @@ def get_collections(
)
raise InternalError()

@overrides
def get_collection_with_segments(self, collection_id: UUID) -> CollectionAndSegments:
try:
request = GetCollectionWithSegmentsRequest(id=collection_id.hex)
response: GetCollectionWithSegmentsResponse = self._sys_db_stub.GetCollectionWithSegments(request)
return CollectionAndSegments(
collection=from_proto_collection(response.collection),
segments=[from_proto_segment(segment) for segment in response.segments]
)
except grpc.RpcError as e:
if e.code() == grpc.StatusCode.NOT_FOUND:
raise NotFoundError()
logger.error(
f"Failed to get collection {collection_id} and its segments due to error: {e}"
)
raise InternalError()

@overrides
def update_collection(
self,
Expand Down
Loading

0 comments on commit 688aa4c

Please sign in to comment.