Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Msudhir/add vector update functionality #14

Merged
merged 30 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d372a09
ci: Add bigtable cleanup script
adchia Jul 3, 2023
f6d3caf
fix: Missing Catalog argument in athena connector (#3661)
GyuminJack Jul 3, 2023
d4f9158
ci: Disable flaky lambda materialization test
adchia Jul 3, 2023
4861af0
fix: Broken non-root path with projects-list.json (#3665)
bjfletcher Jul 3, 2023
48e0971
fix: Manage redis pipe's context (#3655)
phil-park Jul 4, 2023
315073f
chore: Bump tough-cookie from 4.0.0 to 4.1.3 in /sdk/python/feast/ui …
dependabot[bot] Jul 11, 2023
870762a
chore: Bump tough-cookie from 4.0.0 to 4.1.3 in /ui (#3676)
dependabot[bot] Jul 11, 2023
478caec
fix: For SQL registry, increase max data_source_name length to 255 (#…
radonnachie Jul 13, 2023
1c01035
fix: Optimize bytes processed when retrieving entity df schema to 0 (…
sudohainguyen Jul 13, 2023
ef4ef32
fix: Entityless fv breaks with `KeyError: __dummy` applying feature_s…
wfoschiera Jul 13, 2023
0ad2d62
chore: Bump protobufjs from 7.1.1 to 7.2.4 in /ui (#3674)
dependabot[bot] Jul 17, 2023
e4c0c9b
chore: Bump protobufjs from 7.1.2 to 7.2.4 in /sdk/python/feast/ui (#…
dependabot[bot] Jul 17, 2023
bef5791
chore: Bump semver from 6.3.0 to 6.3.1 in /ui (#3678)
dependabot[bot] Jul 17, 2023
928be7b
chore: Bump semver from 6.3.0 to 6.3.1 in /sdk/python/feast/ui (#3679)
dependabot[bot] Jul 17, 2023
12f57a9
chore: Bump google.golang.org/grpc from 1.47.0 to 1.53.0 (#3670)
dependabot[bot] Jul 17, 2023
9527183
chore(release): release 0.32.0
feast-ci-bot Jul 17, 2023
76270f6
fix: Redshift push ignores schema (#3671)
metavee Jul 24, 2023
c75a01f
fix: Add aws-sts dependency in java sdk so that S3 client acquires IR…
harmeet-singh-discovery Aug 1, 2023
0578b9b
Adding initial update changes
Aug 7, 2023
8487678
Merge branch 'feast-dev:master' into msudhir/add-vector-update-functi…
Manisha4 Aug 7, 2023
5828891
Added formatting changes
Aug 7, 2023
4a29d33
Revert "Merge branch 'feast-dev:master' into msudhir/add-vector-updat…
Aug 7, 2023
e209770
Added more tests and functionality
Aug 8, 2023
ebe1e32
updating tests
Aug 8, 2023
62692e0
updated functionality and added more tests
Aug 9, 2023
0680c94
correcting a test case
Aug 9, 2023
5c5490d
Making formatting corrections and changeing log
Aug 9, 2023
cdadb87
Improved tests and added functionality to convert feast schema to mil…
Aug 10, 2023
e1fd230
Added PR Review comments
Aug 11, 2023
d0c4269
Fixed failing test
Aug 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions sdk/python/docs/source/feast.protos.feast.core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,22 @@ feast.protos.feast.core.FeatureView\_pb2\_grpc module
:undoc-members:
:show-inheritance:

feast.protos.feast.core.VectorFeatureView\_pb2 module
-----------------------------------------------

.. automodule:: feast.protos.feast.core.VectorFeatureView_pb2
:members:
:undoc-members:
:show-inheritance:

feast.protos.feast.core.VectorFeatureView\_pb2\_grpc module
-----------------------------------------------------

.. automodule:: feast.protos.feast.core.VectorFeatureView_pb2_grpc
:members:
:undoc-members:
:show-inheritance:

feast.protos.feast.core.Feature\_pb2 module
-------------------------------------------

Expand Down
156 changes: 153 additions & 3 deletions sdk/python/feast/expediagroup/vectordb/milvus_online_store.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,37 @@
import logging
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple

from pydantic.typing import Literal
from pymilvus import (
Collection,
CollectionSchema,
DataType,
FieldSchema,
connections,
utility,
)

from feast import Entity, RepoConfig
from feast.expediagroup.vectordb.vector_feature_view import VectorFeatureView
from feast.expediagroup.vectordb.vector_online_store import VectorOnlineStore
from feast.field import Field
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
from feast.repo_config import FeastConfigBaseModel
from feast.types import (
Array,
FeastType,
Float32,
Float64,
Int32,
Int64,
Invalid,
String,
)
from feast.usage import log_exceptions_and_usage

logger = logging.getLogger(__name__)


class MilvusOnlineStoreConfig(FeastConfigBaseModel):
Expand All @@ -17,13 +40,47 @@ class MilvusOnlineStoreConfig(FeastConfigBaseModel):
type: Literal["milvus"] = "milvus"
"""Online store type selector"""

alias: str = "default"
""" alias for milvus connection"""

host: str
""" the host URL """

username: str
""" username to connect to Milvus """

password: str
""" password to connect to Milvus """

port: int = 19530
""" the port to connect to a Milvus instance. Should be the one used for GRPC (default: 19530) """


class MilvusConnectionManager:
def __init__(self, online_config: RepoConfig):
self.online_config = online_config

def __enter__(self):
# Connecting to Milvus
logger.info(
f"Connecting to Milvus with alias {self.online_config.alias} and host {self.online_config.host} and default port {self.online_config.port}."
)
connections.connect(
host=self.online_config.host,
username=self.online_config.username,
password=self.online_config.password,
use_secure=True,
)

def __exit__(self, exc_type, exc_value, traceback):
# Disconnecting from Milvus
logger.info("Closing the connection to Milvus")
connections.disconnect(self.online_config.alias)
logger.info("Connection Closed")
if exc_type is not None:
logger.error(f"An exception of type {exc_type} occurred: {exc_value}")


class MilvusOnlineStore(VectorOnlineStore):
def online_write_batch(
self,
Expand All @@ -49,6 +106,7 @@ def online_read(
"to be implemented in https://jira.expedia.biz/browse/EAPC-7972"
)

@log_exceptions_and_usage(online_store="milvus")
def update(
self,
config: RepoConfig,
Expand All @@ -58,9 +116,39 @@ def update(
entities_to_keep: Sequence[Entity],
partial: bool,
):
raise NotImplementedError(
"to be implemented in https://jira.expedia.biz/browse/EAPC-7970"
)
with MilvusConnectionManager(config.online_store):
for table_to_keep in tables_to_keep:
collection_available = utility.has_collection(table_to_keep.name)
try:
if collection_available:
logger.info(f"Collection {table_to_keep.name} already exists.")
else:
schema = self._convert_featureview_schema_to_milvus_readable(
table_to_keep.schema
)

collection = Collection(name=table_to_keep.name, schema=schema)
logger.info(f"Collection name is {collection.name}")
logger.info(
f"Collection {table_to_keep.name} has been created successfully."
)
except Exception as e:
logger.error(f"Collection update failed due to {e}")

for table_to_delete in tables_to_delete:
collection_available = utility.has_collection(table_to_delete.name)
try:
if collection_available:
utility.drop_collection(table_to_delete.name)
logger.info(
f"Collection {table_to_delete.name} has been deleted successfully."
)
else:
return logger.error(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove return statement

f"Collection {table_to_delete.name} does not exist or is already deleted."
)
except Exception as e:
logger.error(f"Collection deletion failed due to {e}")

def teardown(
self,
Expand All @@ -71,3 +159,65 @@ def teardown(
raise NotImplementedError(
"to be implemented in https://jira.expedia.biz/browse/EAPC-7974"
)

def _convert_featureview_schema_to_milvus_readable(
self, feast_schema: List[Field]
) -> CollectionSchema:
"""
Converting a schema understood by Feast to a schema that is readable by Milvus so that it
can be used when a collection is created in Milvus.

Parameters:
feast_schema (List[Field]): Schema stored in VectorFeatureView.

Returns:
(CollectionSchema): Schema readable by Milvus.

"""
boolean_mapping_from_string = {"True": True, "False": False}
field_list = []

for field in feast_schema:
data_type = self._feast_to_milvus_data_type(field.dtype)
field_name = field.name
description = field.tags.get("description")
is_primary = boolean_mapping_from_string.get(field.tags.get("is_primary"))
dimension = field.tags.get("dimension")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dimensions have to be retrieved from VectorFeatureView. Also, the name of the column holding the vector is defined separately in VectorFeatureView ("vector_field")


if dimension is not None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed?

dimension = int(field.tags.get("dimension"))
# Appending the above converted values to construct a FieldSchema
field_list.append(
FieldSchema(
name=field_name,
dtype=data_type,
description=description,
is_primary=is_primary,
dim=dimension,
)
)
# Returning a CollectionSchema which is a list of type FieldSchema.
return CollectionSchema(field_list)

def _feast_to_milvus_data_type(self, feast_type: FeastType) -> DataType:
"""
Mapping for converting Feast data type to a data type compatible wih Milvus.

Parameters:
feast_type (FeastType): This is a type associated with a Feature that is stored in a VectorFeatureView, readable with Feast.

Returns:
DataType : DataType associated with what Milvus can understand and associate its Feature types to
"""

return {
Int32: DataType.INT32,
Int64: DataType.INT64,
Float32: DataType.FLOAT,
Float64: DataType.DOUBLE,
String: DataType.STRING,
Invalid: DataType.UNKNOWN,
Array(Float32): DataType.FLOAT_VECTOR,
# TODO: Need to think about list of binaries and list of bytes
# FeastType.BYTES_LIST: DataType.BINARY_VECTOR
}.get(feast_type, None)
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class VectorFeatureView(BaseFeatureView):

# inheriting from FeatureView wouldn't work due to issue with conflicting proto classes
# therefore using composition instead
name: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

feature_view already has an attribute name

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It did not reflect and threw an error when I tried to add name

feature_view: FeatureView
vector_field: str
dimensions: int
Expand Down Expand Up @@ -106,7 +107,7 @@ def __init__(
tags=tags,
owner=owner,
)

self.name = name
self.feature_view = feature_view
self.vector_field = vector_field
self.dimensions = dimensions
Expand Down
4 changes: 4 additions & 0 deletions sdk/python/feast/repo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ def __init__(self, **data: Any):
self._offline_config = "redshift"
elif data["provider"] == "azure":
self._offline_config = "mssql"
elif data["provider"] == "milvus":
self._online_config = "milvus"

self._online_store = None
if "online_store" in data:
Expand All @@ -216,6 +218,8 @@ def __init__(self, **data: Any):
self._online_config = "dynamodb"
elif data["provider"] == "rockset":
self._online_config = "rockset"
elif data["provider"] == "milvus":
self._online_config = "milvus"

self._batch_engine = None
if "batch_engine" in data:
Expand Down
Loading
Loading