From 86b096a0670e77d7a8715d3eb62e71dcd954eb49 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Wed, 17 Jul 2024 13:06:05 -0400 Subject: [PATCH 01/16] add docstrings --- samples/index_tuning_sample/index_search.py | 6 ++++++ src/langchain_google_alloydb_pg/engine.py | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/samples/index_tuning_sample/index_search.py b/samples/index_tuning_sample/index_search.py index cb92c219..a7dbecf4 100644 --- a/samples/index_tuning_sample/index_search.py +++ b/samples/index_tuning_sample/index_search.py @@ -51,6 +51,7 @@ async def get_vector_store(): + """Get vector store instance.""" engine = await AlloyDBEngine.afrom_instance( project_id=PROJECT_ID, region=REGION, @@ -71,6 +72,7 @@ async def get_vector_store(): async def query_vector_with_timing(vector_store, query): + """Query using the vector with timing""" start_time = time.monotonic() # timer starts docs = await vector_store.asimilarity_search(k=k, query=query) end_time = time.monotonic() # timer ends @@ -79,6 +81,7 @@ async def query_vector_with_timing(vector_store, query): async def hnsw_search(vector_store, knn_docs): + """Create an HNSW index and perform similaity search with the index.""" hnsw_index = HNSWIndex(name="hnsw", m=36, ef_construction=96) await vector_store.aapply_vector_index(hnsw_index) assert await vector_store.is_valid_index(hnsw_index.name) @@ -99,6 +102,7 @@ async def hnsw_search(vector_store, knn_docs): async def ivfflat_search(vector_store, knn_docs): + """Create an IVFFlat index and perform similaity search with the index.""" ivfflat_index = IVFFlatIndex(name="ivfflat") await vector_store.aapply_vector_index(ivfflat_index) assert await vector_store.is_valid_index(ivfflat_index.name) @@ -119,6 +123,7 @@ async def ivfflat_search(vector_store, knn_docs): async def knn_search(vector_store): + """Perform similaity search without index.""" latencies = [] knn_docs = [] for query in queries: @@ -130,6 +135,7 @@ async def knn_search(vector_store): def calculate_recall(base, target): + """Calculate recall on the target result.""" # size of intersection / total number of times base = {doc.page_content for doc in base} target = {doc.page_content for doc in target} diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index 8d9373b4..f262c780 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -131,6 +131,7 @@ def from_instance( user: Optional[str] = None, password: Optional[str] = None, ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, + service_account_email: Optional[str] = None, ) -> AlloyDBEngine: # Running a loop in a background thread allows us to support # async methods from non-async environments @@ -148,6 +149,7 @@ def from_instance( password, loop=loop, thread=thread, + service_account_email=service_account_email, ) return asyncio.run_coroutine_threadsafe(coro, loop).result() @@ -164,6 +166,7 @@ async def _create( password: Optional[str] = None, loop: Optional[asyncio.AbstractEventLoop] = None, thread: Optional[Thread] = None, + service_account_email: Optional[str] = None, ) -> AlloyDBEngine: # error if only one of user or password is set, must be both or neither if bool(user) ^ bool(password): @@ -184,12 +187,14 @@ async def _create( db_user = user # otherwise use automatic IAM database authentication else: - # get application default credentials - credentials, _ = google.auth.default( - scopes=["https://www.googleapis.com/auth/userinfo.email"] - ) - db_user = await _get_iam_principal_email(credentials) - enable_iam_auth = True + if service_account_email: + db_user = service_account_email + else: + # get application default credentials + credentials, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/userinfo.email"] + ) + db_user = await _get_iam_principal_email(credentials) # anonymous function to be used for SQLAlchemy 'creator' argument async def getconn() -> asyncpg.Connection: @@ -221,6 +226,7 @@ async def afrom_instance( user: Optional[str] = None, password: Optional[str] = None, ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, + service_account_email: Optional[str] = None, ) -> AlloyDBEngine: return await cls._create( project_id, @@ -231,6 +237,7 @@ async def afrom_instance( ip_type, user, password, + service_account_email=service_account_email, ) @classmethod From 44f57655db3373d7532e344343b18a4ef0988477 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Wed, 17 Jul 2024 17:29:17 -0400 Subject: [PATCH 02/16] added more docstrings --- .../chat_message_history.py | 48 +++++++- src/langchain_google_alloydb_pg/engine.py | 105 +++++++++++++++++- 2 files changed, 150 insertions(+), 3 deletions(-) diff --git a/src/langchain_google_alloydb_pg/chat_message_history.py b/src/langchain_google_alloydb_pg/chat_message_history.py index a81566f9..34d5ae34 100644 --- a/src/langchain_google_alloydb_pg/chat_message_history.py +++ b/src/langchain_google_alloydb_pg/chat_message_history.py @@ -51,6 +51,18 @@ def __init__( table_name: str, messages: List[BaseMessage], ): + """_summary_ + + Args: + key (object): _description_ + engine (AlloyDBEngine): AlloyDB engine to use. + session_id (str): Retrieve the table content with this session ID. + table_name (str): Table name that stores the chat message history. + messages (List[BaseMessage]): _description_ + + Raises: + Exception: _description_ + """ if key != AlloyDBChatMessageHistory.__create_key: raise Exception( "Only create class through 'create' or 'create_sync' methods!" @@ -67,6 +79,19 @@ async def create( session_id: str, table_name: str, ) -> AlloyDBChatMessageHistory: + """Create a new AlloyDBChatMessageHistory instance. + + Args: + engine (AlloyDBEngine): AlloyDB engine to use. + session_id (str): Retrieve the table content with this session ID. + table_name (str): Table name that stores the chat message history. + + Raises: + IndexError: Raises error if the table provided does not contain required schema. + + Returns: + AlloyDBChatMessageHistory: A newly created instance of AlloyDBChatMessageHistory. + """ table_schema = await engine._aload_table_schema(table_name) column_names = table_schema.columns.keys() @@ -94,11 +119,24 @@ def create_sync( session_id: str, table_name: str, ) -> AlloyDBChatMessageHistory: + """Create a new AlloyDBChatMessageHistory instance. + + Args: + engine (AlloyDBEngine): AlloyDB engine to use. + session_id (str): Retrieve the table content with this session ID. + table_name (str): Table name that stores the chat message history. + + Raises: + IndexError: Raises error if the table provided does not contain required schema. + + Returns: + AlloyDBChatMessageHistory: A newly created instance of AlloyDBChatMessageHistory. + """ coro = cls.create(engine, session_id, table_name) return engine._run_as_sync(coro) async def aadd_message(self, message: BaseMessage) -> None: - """Append the message to the record in AlloyDB""" + """Append the message to the record in AlloyDB.""" query = f"""INSERT INTO "{self.table_name}"(session_id, data, type) VALUES (:session_id, :data, :type); """ @@ -115,28 +153,34 @@ async def aadd_message(self, message: BaseMessage) -> None: ) def add_message(self, message: BaseMessage) -> None: + """Append the message to the record in AlloyDB.""" self.engine._run_as_sync(self.aadd_message(message)) async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: + """Append a list of messages to the record in AlloyDB.""" for message in messages: await self.aadd_message(message) def add_messages(self, messages: Sequence[BaseMessage]) -> None: + """Append a list of messages to the record in AlloyDB.""" self.engine._run_as_sync(self.aadd_messages(messages)) async def aclear(self) -> None: - """Clear session memory from AlloyDB""" + """Clear session memory from AlloyDB.""" query = f"""DELETE FROM "{self.table_name}" WHERE session_id = :session_id;""" await self.engine._aexecute(query, {"session_id": self.session_id}) self.messages = [] def clear(self) -> None: + """Clear session memory from AlloyDB.""" self.engine._run_as_sync(self.aclear()) async def async_messages(self) -> None: + """Retrieve the messages from AlloyDB.""" self.messages = await _aget_messages( self.engine, self.session_id, self.table_name ) def sync_messages(self) -> None: + """Retrieve the messages from AlloyDB.""" self.engine._run_as_sync(self.async_messages()) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index f262c780..06900d6f 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -92,6 +92,13 @@ class Column: nullable: bool = True def __post_init__(self) -> None: + """Check if initialization parameters are valid. + + Raises: + ValueError: Raises error if Column name is not string. + ValueError: Raises error if data_type is not type string. + """ + if not isinstance(self.name, str): raise ValueError("Column name must be type string") if not isinstance(self.data_type, str): @@ -133,6 +140,22 @@ def from_instance( ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, service_account_email: Optional[str] = None, ) -> AlloyDBEngine: + """Create an AlloyDBEngine from an AlloyDB instance. + + Args: + project_id (str): GCP project ID. + region (str): Cloud AlloyDB instance region. + cluster (str): Cloud AlloyDB cluster name. + instance (str): Cloud AlloyDB instance name. + database (str): Database name. + user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. + password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. + ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. + service_account_email (Optional[str], optional): Service account email. Defaults to None. + + Returns: + AlloyDBEngine: A newly created AlloyDBEngine instance. + """ # Running a loop in a background thread allows us to support # async methods from non-async environments loop = asyncio.new_event_loop() @@ -168,6 +191,25 @@ async def _create( thread: Optional[Thread] = None, service_account_email: Optional[str] = None, ) -> AlloyDBEngine: + """Create an AlloyDBEngine from an AlloyDB instance. + + Args: + project_id (str): GCP project ID. + region (str): Cloud AlloyDB instance region. + cluster (str): Cloud AlloyDB cluster name. + instance (str): Cloud AlloyDB instance name. + database (str): Database name. + user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. + password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. + ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. + service_account_email (Optional[str], optional): Service account email. Defaults to None. + + Raises: + ValueError: Raises error if only one of 'user' or 'password' is specified. + + Returns: + AlloyDBEngine: A newly created AlloyDBEngine instance. + """ # error if only one of user or password is set, must be both or neither if bool(user) ^ bool(password): raise ValueError( @@ -228,6 +270,22 @@ async def afrom_instance( ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, service_account_email: Optional[str] = None, ) -> AlloyDBEngine: + """_summary_ + + Args: + project_id (str): GCP project ID. + region (str): Cloud AlloyDB instance region. + cluster (str): Cloud AlloyDB cluster name. + instance (str): Cloud AlloyDB instance name. + database (str): Cloud AlloyDB database name. + user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. + password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. + ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. + service_account_email (Optional[str], optional): Service account email. Defaults to None. + + Returns: + AlloyDBEngine: A newly created AlloyDBEngine instance. + """ return await cls._create( project_id, region, @@ -242,6 +300,7 @@ async def afrom_instance( @classmethod def from_engine(cls: Type[AlloyDBEngine], engine: AsyncEngine) -> AlloyDBEngine: + """Create an AlloyDBEngine instance from engine.""" return cls(cls.__create_key, engine, None, None) async def _aexecute(self, query: str, params: Optional[dict] = None) -> None: @@ -259,8 +318,8 @@ async def _aexecute_outside_tx(self, query: str) -> None: async def _afetch( self, query: str, params: Optional[dict] = None ) -> Sequence[RowMapping]: + """Fetch results froma SQL query.""" async with self._engine.connect() as conn: - """Fetch results from a SQL query.""" result = await conn.execute(text(query), params) result_map = result.mappings() result_fetch = result_map.fetchall() @@ -268,12 +327,15 @@ async def _afetch( return result_fetch def _execute(self, query: str, params: Optional[dict] = None) -> None: + """Execute a SQL query.""" return self._run_as_sync(self._aexecute(query, params)) def _fetch(self, query: str, params: Optional[dict] = None) -> Sequence[RowMapping]: + """Fetch results froma SQL query.""" return self._run_as_sync(self._afetch(query, params)) def _run_as_sync(self, coro: Awaitable[T]) -> T: + """Run an async coroutine synchronously""" if not self._loop: raise Exception("Engine was initialized async.") return asyncio.run_coroutine_threadsafe(coro, self._loop).result() @@ -344,6 +406,30 @@ def init_vectorstore_table( overwrite_existing: bool = False, store_metadata: bool = True, ) -> None: + """ + Create a table for saving of vectors to be used with Alloy DB. + If table already exists and overwrite flag is not set, a TABLE_ALREADY_EXISTS error is thrown. + + Args: + table_name (str): The table name. + vector_size (int): Vector size for the embedding model to be used. + content_column (str): Name of the column to store document content. + Default: "page_content". + embedding_column (str) : Name of the column to store vector embeddings. + Default: "embedding". + metadata_columns (List[Column]): A list of Columns to create for custom + metadata. Default: []. Optional. + metadata_json_column (str): The column to store extra metadata in JSON format. + Default: "langchain_metadata". Optional. + id_column (str): Name of the column to store ids. + Default: "langchain_id". Optional, + overwrite_existing (bool): Whether to drop the existing table before insertion. + Default: False. + store_metadata (bool): Whether to store metadata in a JSON column if not specified by `metadata_columns`. + Default: True. + Raises: + :class:`DuplicateTableError `: if table already exists. + """ return self._run_as_sync( self.ainit_vectorstore_table( table_name, @@ -359,6 +445,7 @@ def init_vectorstore_table( ) async def ainit_chat_history_table(self, table_name: str) -> None: + """Create a new chat history table.""" create_table_query = f"""CREATE TABLE IF NOT EXISTS "{table_name}"( id SERIAL PRIMARY KEY, session_id TEXT NOT NULL, @@ -368,6 +455,7 @@ async def ainit_chat_history_table(self, table_name: str) -> None: await self._aexecute(create_table_query) def init_chat_history_table(self, table_name: str) -> None: + """Create a new chat history table.""" return self._run_as_sync( self.ainit_chat_history_table( table_name, @@ -418,6 +506,21 @@ def init_document_table( metadata_json_column: str = "langchain_metadata", store_metadata: bool = True, ) -> None: + """ + Create a table for saving of langchain documents. + If table already exists, a DuplicateTableError error is thrown. + + Args: + table_name (str): The PgSQL database table name. + content_column (str): Name of the column to store document content. + Default: "page_content". + metadata_columns (List[Column]): A list of Columns + to create for custom metadata. Optional. + metadata_json_column (str): The column to store extra metadata in JSON format. + Default: "langchain_metadata". Optional. + store_metadata (bool): Whether to store extra metadata in a metadata column + if not described in 'metadata' field list (Default: True). + """ return self._run_as_sync( self.ainit_document_table( table_name, From 2c7114c149fc292a3c1902db193e24fdfbfeecf2 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 12:37:26 -0400 Subject: [PATCH 03/16] add more docstring --- .../chat_message_history.py | 10 +- src/langchain_google_alloydb_pg/engine.py | 6 +- src/langchain_google_alloydb_pg/indexes.py | 9 ++ src/langchain_google_alloydb_pg/loader.py | 88 ++++++++++- .../vectorstore.py | 148 +++++++++++++++++- 5 files changed, 246 insertions(+), 15 deletions(-) diff --git a/src/langchain_google_alloydb_pg/chat_message_history.py b/src/langchain_google_alloydb_pg/chat_message_history.py index 34d5ae34..c5ce0a5f 100644 --- a/src/langchain_google_alloydb_pg/chat_message_history.py +++ b/src/langchain_google_alloydb_pg/chat_message_history.py @@ -54,14 +54,14 @@ def __init__( """_summary_ Args: - key (object): _description_ + key (object): Key to prevent direct constructor usage. engine (AlloyDBEngine): AlloyDB engine to use. session_id (str): Retrieve the table content with this session ID. table_name (str): Table name that stores the chat message history. - messages (List[BaseMessage]): _description_ + messages (List[BaseMessage]): Messages to store. Raises: - Exception: _description_ + Exception: If constructor is directly called by the user. """ if key != AlloyDBChatMessageHistory.__create_key: raise Exception( @@ -87,7 +87,7 @@ async def create( table_name (str): Table name that stores the chat message history. Raises: - IndexError: Raises error if the table provided does not contain required schema. + IndexError: If the table provided does not contain required schema. Returns: AlloyDBChatMessageHistory: A newly created instance of AlloyDBChatMessageHistory. @@ -127,7 +127,7 @@ def create_sync( table_name (str): Table name that stores the chat message history. Raises: - IndexError: Raises error if the table provided does not contain required schema. + IndexError: If the table provided does not contain required schema. Returns: AlloyDBChatMessageHistory: A newly created instance of AlloyDBChatMessageHistory. diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index 06900d6f..c54edc6b 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -151,7 +151,7 @@ def from_instance( user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. - service_account_email (Optional[str], optional): Service account email. Defaults to None. + service_account_email (Optional[str], optional): IAM service account email. Defaults to None. Returns: AlloyDBEngine: A newly created AlloyDBEngine instance. @@ -202,7 +202,7 @@ async def _create( user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. - service_account_email (Optional[str], optional): Service account email. Defaults to None. + service_account_email (Optional[str], optional): IAM service account email. Raises: ValueError: Raises error if only one of 'user' or 'password' is specified. @@ -281,7 +281,7 @@ async def afrom_instance( user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. - service_account_email (Optional[str], optional): Service account email. Defaults to None. + service_account_email (Optional[str], optional): IAM service account email. Defaults to None. Returns: AlloyDBEngine: A newly created AlloyDBEngine instance. diff --git a/src/langchain_google_alloydb_pg/indexes.py b/src/langchain_google_alloydb_pg/indexes.py index bc8357ca..4ca8c767 100644 --- a/src/langchain_google_alloydb_pg/indexes.py +++ b/src/langchain_google_alloydb_pg/indexes.py @@ -49,6 +49,7 @@ class BaseIndex(ABC): @abstractmethod def index_options(self) -> str: + """Set index query options for vector store initialization.""" raise NotImplementedError( "index_options method must be implemented by subclass" ) @@ -66,6 +67,7 @@ class HNSWIndex(BaseIndex): ef_construction: int = 64 def index_options(self) -> str: + """Set index query options for vector store initialization.""" return f"(m = {self.m}, ef_construction = {self.ef_construction})" @@ -73,6 +75,7 @@ def index_options(self) -> str: class QueryOptions(ABC): @abstractmethod def to_string(self) -> str: + """Convert index attributes to string.""" raise NotImplementedError("to_string method must be implemented by subclass") @@ -81,6 +84,7 @@ class HNSWQueryOptions(QueryOptions): ef_search: int = 40 def to_string(self) -> str: + """Convert index attributes to string.""" return f"hnsw.ef_search = {self.ef_search}" @@ -90,6 +94,7 @@ class IVFFlatIndex(BaseIndex): lists: int = 100 def index_options(self) -> str: + """Set index query options for vector store initialization.""" return f"(lists = {self.lists})" @@ -98,6 +103,7 @@ class IVFFlatQueryOptions(QueryOptions): probes: int = 1 def to_string(self) -> str: + """Convert index attributes to string.""" return f"ivfflat.probes = {self.probes}" @@ -110,6 +116,7 @@ class IVFIndex(BaseIndex): ) # Disable `quantizer` initialization currently only supports the value "sq8" def index_options(self) -> str: + """Set index query options for vector store initialization.""" return f"(lists = {self.lists}, quantizer = {self.quantizer})" @@ -118,6 +125,7 @@ class IVFQueryOptions(QueryOptions): probes: int = 1 def to_string(self) -> str: + """Convert index attributes to string.""" return f"ivf.probes = {self.probes}" @@ -130,4 +138,5 @@ class ScaNNIndex(BaseIndex): ) # Excludes `quantizer` from initialization currently only supports the value "sq8" def index_options(self) -> str: + """Set index query options for vector store initialization.""" return f"(num_leaves = {self.num_leaves}, quantizer = {self.quantizer})" diff --git a/src/langchain_google_alloydb_pg/loader.py b/src/langchain_google_alloydb_pg/loader.py index 90f5ab91..91733b3b 100644 --- a/src/langchain_google_alloydb_pg/loader.py +++ b/src/langchain_google_alloydb_pg/loader.py @@ -38,20 +38,24 @@ def text_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: + """txt document formatter.""" return " ".join(str(row[column]) for column in content_columns if column in row) def csv_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: + """CSV documennt formatter.""" return ", ".join(str(row[column]) for column in content_columns if column in row) def yaml_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: + """YAML documennt formatter.""" return "\n".join( f"{column}: {str(row[column])}" for column in content_columns if column in row ) def json_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: + """JSON documennt formatter.""" dictionary: Dict[str, Any] = {} for column in content_columns: if column in row: @@ -66,6 +70,7 @@ def _parse_doc_from_row( metadata_json_column: Optional[str] = DEFAULT_METADATA_COL, formatter: Callable[[Dict[str, Any], Iterable[str]], str] = text_formatter, ) -> Document: + """Parse row into document.""" page_content = formatter(row, content_columns) metadata: Dict[str, Any] = {} # unnest metadata from langchain_metadata column @@ -86,6 +91,7 @@ def _parse_row_from_doc( content_column: str = DEFAULT_CONTENT_COL, metadata_json_column: Optional[str] = DEFAULT_METADATA_COL, ) -> Dict[str, Any]: + """Parse document into a dictionary of rows.""" doc_metadata = doc.metadata.copy() row: Dict[str, Any] = {content_column: doc.page_content} for entry in doc.metadata: @@ -119,6 +125,22 @@ def __init__( formatter: Callable[[Dict[str, Any], Iterable[str]], str], metadata_json_column: Optional[str] = None, ) -> None: + """AlloyDBLoader initialization. + + Args: + key (object): Prevent direct constructor usage. + engine (AlloyDBEngine): AsyncEngine with pool connection to the postgres database + query (Optional[str], optional): SQL query. Defaults to None. + table_name (Optional[str], optional): Name of table to query. Defaults to None. + content_columns (Optional[List[str]], optional): Column that represent a Document's page_content. Defaults to the first column. + metadata_columns (Optional[List[str]], optional): Column(s) that represent a Document's metadata. Defaults to None. + metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". + format (Optional[str], optional): Format of page content (OneOf: text, csv, YAML, JSON). Defaults to 'text'. + formatter (Optional[Callable], optional): A function to format page content (OneOf: format, formatter). Defaults to None. + + Raises: + Excaption: + """ if key != AlloyDBLoader.__create_key: raise Exception( "Only create class through 'create' or 'create_sync' methods!" @@ -143,7 +165,7 @@ async def create( format: Optional[str] = None, formatter: Optional[Callable] = None, ) -> AlloyDBLoader: - """Constructor for AlloyDBLoader + """Create an AlloyDBLoader instance. Args: engine (AlloyDBEngine):AsyncEngine with pool connection to the postgres database @@ -155,8 +177,8 @@ async def create( format (Optional[str], optional): Format of page content (OneOf: text, csv, YAML, JSON). Defaults to 'text'. formatter (Optional[Callable], optional): A function to format page content (OneOf: format, formatter). Defaults to None. - Returns: - AlloyDBLoader + Raises: + Exception: if called directly by user. """ if table_name and query: raise ValueError("Only one of 'table_name' or 'query' should be specified.") @@ -236,6 +258,21 @@ def create_sync( format: Optional[str] = None, formatter: Optional[Callable] = None, ) -> AlloyDBLoader: + """Create an AlloyDBLoader instance. + + Args: + engine (AlloyDBEngine):AsyncEngine with pool connection to the postgres database + query (Optional[str], optional): SQL query. Defaults to None. + table_name (Optional[str], optional): Name of table to query. Defaults to None. + content_columns (Optional[List[str]], optional): Column that represent a Document's page_content. Defaults to the first column. + metadata_columns (Optional[List[str]], optional): Column(s) that represent a Document's metadata. Defaults to None. + metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". + format (Optional[str], optional): Format of page content (OneOf: text, csv, YAML, JSON). Defaults to 'text'. + formatter (Optional[Callable], optional): A function to format page content (OneOf: format, formatter). Defaults to None. + + Returns: + AlloyDBLoader + """ coro = cls.create( engine, query, @@ -251,6 +288,7 @@ def create_sync( async def _collect_async_items( self, docs_generator: AsyncIterator[Document] ) -> List[Document]: + """Exhause document generator into a list.""" return [doc async for doc in docs_generator] def load(self) -> List[Document]: @@ -313,6 +351,19 @@ def __init__( metadata_columns: List[str] = [], metadata_json_column: Optional[str] = None, ) -> None: + """Create an AlloyDBDocumentSaver instance. + + Args: + key (object): Prevent direct constructor usage. + engine (AlloyDBEngine): AsyncEngine with pool connection to the postgres database + table_name (Optional[str], optional): Name of table to query. Defaults to None. + content_columns (Optional[List[str]], optional): Column that represent a Document's page_content. Defaults to the first column. + metadata_columns (Optional[List[str]], optional): Column(s) that represent a Document's metadata. Defaults to None. + metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Raises: + Exception: if called directly by user. + """ if key != AlloyDBDocumentSaver.__create_key: raise Exception( "Only create class through 'create' or 'create_sync' methods!" @@ -332,6 +383,18 @@ async def create( metadata_columns: List[str] = [], metadata_json_column: Optional[str] = DEFAULT_METADATA_COL, ) -> AlloyDBDocumentSaver: + """Create an AlloyDBDocumentSaver instance. + + Args: + engine (AlloyDBEngine):AsyncEngine with pool connection to the postgres database + table_name (Optional[str], optional): Name of table to query. Defaults to None. + content_columns (Optional[List[str]], optional): Column that represent a Document's page_content. Defaults to the first column. + metadata_columns (Optional[List[str]], optional): Column(s) that represent a Document's metadata. Defaults to None. + metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Returns: + AlloyDBDocumentSaver + """ table_schema = await engine._aload_table_schema(table_name) column_names = table_schema.columns.keys() if content_column not in column_names: @@ -377,6 +440,18 @@ def create_sync( metadata_columns: List[str] = [], metadata_json_column: str = DEFAULT_METADATA_COL, ) -> AlloyDBDocumentSaver: + """Create an AlloyDBDocumentSaver instance. + + Args: + engine (AlloyDBEngine):AsyncEngine with pool connection to the postgres database + table_name (Optional[str], optional): Name of table to query. Defaults to None. + content_columns (Optional[List[str]], optional): Column that represent a Document's page_content. Defaults to the first column. + metadata_columns (Optional[List[str]], optional): Column(s) that represent a Document's metadata. Defaults to None. + metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Returns: + AlloyDBDocumentSaver + """ coro = cls.create( engine, table_name, @@ -429,6 +504,13 @@ async def aadd_documents(self, docs: List[Document]) -> None: await self.engine._aexecute(query, row) def add_documents(self, docs: List[Document]) -> None: + """ + Save documents in the DocumentSaver table. Document’s metadata is added to columns if found or + stored in langchain_metadata JSON column. + + Args: + docs (List[langchain_core.documents.Document]): a list of documents to be saved. + """ self.engine._run_as_sync(self.aadd_documents(docs)) async def adelete(self, docs: List[Document]) -> None: diff --git a/src/langchain_google_alloydb_pg/vectorstore.py b/src/langchain_google_alloydb_pg/vectorstore.py index 5d63fa43..495699ca 100644 --- a/src/langchain_google_alloydb_pg/vectorstore.py +++ b/src/langchain_google_alloydb_pg/vectorstore.py @@ -59,6 +59,28 @@ def __init__( lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, ): + """Constructor for AlloyDBVectorStore. + Args: + key (object): Prevent direct constructor usage. + engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. + embedding_service (Embeddings): Text embedding model to use. + table_name (str): Name of the existing table or the table to be created. + content_column (str): Column that represent a Document’s page_content. Defaults to "content". + embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". + metadata_columns (List[str]): Column(s) that represent a document's metadata. + ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. + id_column (str): Column that represents the Document's id. Defaults to "langchain_id". + metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. + k (int): Number of Documents to return from search. Defaults to 4. + fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. + index_query_options (QueryOptions): Index query option. + + + Raises: + Exception: If called directly by user. + """ if key != AlloyDBVectorStore.__create_key: raise Exception( "Only create class through 'create' or 'create_sync' methods!" @@ -96,8 +118,9 @@ async def create( lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, ) -> AlloyDBVectorStore: - """Constructor for AlloyDBVectorStore. + """Create an AlloyDBVectorStore instance. Args: + key (object): Prevent direct constructor usage. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. embedding_service (Embeddings): Text embedding model to use. table_name (str): Name of the existing table or the table to be created. @@ -107,6 +130,14 @@ async def create( ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. id_column (str): Column that represents the Document's id. Defaults to "langchain_id". metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. + k (int): Number of Documents to return from search. Defaults to 4. + fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. + index_query_options (QueryOptions): Index query option. + + Returns: + AlloyDBVectorStore """ if metadata_columns and ignore_metadata_columns: raise ValueError( @@ -191,6 +222,27 @@ def create_sync( lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, ) -> AlloyDBVectorStore: + """Create an AlloyDBVectorStore instance. + Args: + key (object): Prevent direct constructor usage. + engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. + embedding_service (Embeddings): Text embedding model to use. + table_name (str): Name of the existing table or the table to be created. + content_column (str): Column that represent a Document’s page_content. Defaults to "content". + embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". + metadata_columns (List[str]): Column(s) that represent a document's metadata. + ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. + id_column (str): Column that represents the Document's id. Defaults to "langchain_id". + metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. + k (int): Number of Documents to return from search. Defaults to 4. + fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. + index_query_options (QueryOptions): Index query option. + + Returns: + AlloyDBVectorStore + """ coro = cls.create( engine, embedding_service, @@ -221,6 +273,7 @@ async def _aadd_embeddings( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: + """Add embeddings to the table.""" if not ids: ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: @@ -268,6 +321,7 @@ async def aadd_texts( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: + """Embed texts and add to the table.""" embeddings = self.embedding_service.embed_documents(list(texts)) ids = await self._aadd_embeddings( texts, embeddings, metadatas=metadatas, ids=ids, **kwargs @@ -280,6 +334,7 @@ async def aadd_documents( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: + """Embed documents and add to the table""" texts = [doc.page_content for doc in documents] metadatas = [doc.metadata for doc in documents] ids = await self.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) @@ -292,6 +347,7 @@ def add_texts( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: + """Embed texts and add to the table.""" return self.engine._run_as_sync( self.aadd_texts(texts, metadatas, ids, **kwargs) ) @@ -302,6 +358,7 @@ def add_documents( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: + """Embed documents and add to the table""" return self.engine._run_as_sync(self.aadd_documents(documents, ids, **kwargs)) async def adelete( @@ -309,6 +366,7 @@ async def adelete( ids: Optional[List[str]] = None, **kwargs: Any, ) -> Optional[bool]: + """Delete records from the table""" if not ids: return False @@ -322,6 +380,7 @@ def delete( ids: Optional[List[str]] = None, **kwargs: Any, ) -> Optional[bool]: + """Delete records from the table""" return self.engine._run_as_sync(self.adelete(ids, **kwargs)) @classmethod @@ -341,6 +400,22 @@ async def afrom_texts( # type: ignore[override] metadata_json_column: str = "langchain_metadata", **kwargs: Any, ) -> AlloyDBVectorStore: + """Create an AlloyDBVectorStore instance from texts. + Args: + texts (List[str]): Texts to add to the vector store. + engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. + embedding_service (Embeddings): Text embedding model to use. + table_name (str): Name of the existing table or the table to be created. + content_column (str): Column that represent a Document’s page_content. Defaults to "content". + embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". + metadata_columns (List[str]): Column(s) that represent a document's metadata. + ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. + id_column (str): Column that represents the Document's id. Defaults to "langchain_id". + metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Returns: + AlloyDBVectorStore + """ vs = await cls.create( engine, embedding, @@ -371,6 +446,22 @@ async def afrom_documents( # type: ignore[override] metadata_json_column: str = "langchain_metadata", **kwargs: Any, ) -> AlloyDBVectorStore: + """Create an AlloyDBVectorStore instance from documents. + Args: + documents (List[Document]): Documents to add to the vector store. + engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. + embedding_service (Embeddings): Text embedding model to use. + table_name (str): Name of the existing table or the table to be created. + content_column (str): Column that represent a Document’s page_content. Defaults to "content". + embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". + metadata_columns (List[str]): Column(s) that represent a document's metadata. + ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. + id_column (str): Column that represents the Document's id. Defaults to "langchain_id". + metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Returns: + AlloyDBVectorStore + """ vs = await cls.create( engine, embedding, @@ -404,6 +495,22 @@ def from_texts( # type: ignore[override] metadata_json_column: str = "langchain_metadata", **kwargs: Any, ) -> AlloyDBVectorStore: + """Create an AlloyDBVectorStore instance from texts. + Args: + texts (List[str]): Texts to add to the vector store. + engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. + embedding_service (Embeddings): Text embedding model to use. + table_name (str): Name of the existing table or the table to be created. + content_column (str): Column that represent a Document’s page_content. Defaults to "content". + embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". + metadata_columns (List[str]): Column(s) that represent a document's metadata. + ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. + id_column (str): Column that represents the Document's id. Defaults to "langchain_id". + metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Returns: + AlloyDBVectorStore + """ coro = cls.afrom_texts( texts, embedding, @@ -437,6 +544,22 @@ def from_documents( # type: ignore[override] metadata_json_column: str = "langchain_metadata", **kwargs: Any, ) -> AlloyDBVectorStore: + """Create an AlloyDBVectorStore instance from documents. + Args: + documents (List[Document]): Documents to add to the vector store. + engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. + embedding_service (Embeddings): Text embedding model to use. + table_name (str): Name of the existing table or the table to be created. + content_column (str): Column that represent a Document’s page_content. Defaults to "content". + embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". + metadata_columns (List[str]): Column(s) that represent a document's metadata. + ignore_metadata_columns (List[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. + id_column (str): Column that represents the Document's id. Defaults to "langchain_id". + metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". + + Returns: + AlloyDBVectorStore + """ coro = cls.afrom_documents( documents, embedding, @@ -460,6 +583,7 @@ async def __query_collection( filter: Optional[str] = None, **kwargs: Any, ) -> Sequence[RowMapping]: + """Perform similarity search query on database.""" k = k if k else self.k operator = self.distance_strategy.operator search_function = self.distance_strategy.search_function @@ -480,6 +604,7 @@ def similarity_search( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return doc selected by similarity search on query.""" return self.engine._run_as_sync( self.asimilarity_search(query, k=k, filter=filter, **kwargs) ) @@ -491,6 +616,7 @@ async def asimilarity_search( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return docs selected by similarity search on query.""" embedding = self.embedding_service.embed_query(text=query) return await self.asimilarity_search_by_vector( @@ -498,9 +624,7 @@ async def asimilarity_search( ) def _select_relevance_score_fn(self) -> Callable[[float], float]: - """ - Select a relevance function based on distance strategy - """ + """Select a relevance function based on distance strategy.""" # Calculate distance strategy provided in # vectorstore constructor if self.distance_strategy == DistanceStrategy.COSINE_DISTANCE: @@ -517,6 +641,7 @@ async def asimilarity_search_with_score( filter: Optional[str] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: + """Return docs and distance scores selected by similarity search on query.""" embedding = self.embedding_service.embed_query(query) docs = await self.asimilarity_search_with_score_by_vector( embedding=embedding, k=k, filter=filter, **kwargs @@ -530,6 +655,7 @@ async def asimilarity_search_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return docs selected by vector similarity search.""" docs_and_scores = await self.asimilarity_search_with_score_by_vector( embedding=embedding, k=k, filter=filter, **kwargs ) @@ -543,6 +669,7 @@ async def asimilarity_search_with_score_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: + """Return docs and distance scores selected by vector similarity search.""" results = await self.__query_collection( embedding=embedding, k=k, filter=filter, **kwargs ) @@ -577,6 +704,7 @@ async def amax_marginal_relevance_search( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return docs selected using the maximal marginal relevance.""" embedding = self.embedding_service.embed_query(text=query) return await self.amax_marginal_relevance_search_by_vector( @@ -620,6 +748,7 @@ async def amax_marginal_relevance_search_with_score_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: + """Return docs and distance scores selected using the maximal marginal relevance.""" results = await self.__query_collection( embedding=embedding, k=fetch_k, filter=filter, **kwargs ) @@ -663,6 +792,7 @@ def similarity_search_with_score( filter: Optional[str] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: + """Return docs and distance scores selected by similarity search on query.""" coro = self.asimilarity_search_with_score(query, k, filter=filter, **kwargs) return self.engine._run_as_sync(coro) @@ -673,6 +803,7 @@ def similarity_search_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return docs selected by vector similarity search.""" coro = self.asimilarity_search_by_vector(embedding, k, filter=filter, **kwargs) return self.engine._run_as_sync(coro) @@ -683,6 +814,7 @@ def similarity_search_with_score_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: + """Return docs and distance scores selected by vector similarity search.""" coro = self.asimilarity_search_with_score_by_vector( embedding, k, filter=filter, **kwargs ) @@ -697,6 +829,7 @@ def max_marginal_relevance_search( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return docs selected using the maximal marginal relevance.""" coro = self.amax_marginal_relevance_search( query, k, @@ -716,6 +849,7 @@ def max_marginal_relevance_search_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: + """Return docs selected using the maximal marginal relevance.""" coro = self.amax_marginal_relevance_search_by_vector( embedding, k, @@ -735,6 +869,7 @@ def max_marginal_relevance_search_with_score_by_vector( filter: Optional[str] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: + """Return docs and distance scores selected using the maximal marginal relevance.""" coro = self.amax_marginal_relevance_search_with_score_by_vector( embedding, k, @@ -746,6 +881,7 @@ def max_marginal_relevance_search_with_score_by_vector( return self.engine._run_as_sync(coro) async def set_maintenance_work_mem(self, num_leaves: int, vector_size: int) -> None: + """Set database maintenance work memory (for ScaNN index creation).""" # Required index memory in MB buffer = 1 index_memory_required = ( @@ -761,6 +897,7 @@ async def aapply_vector_index( name: Optional[str] = None, concurrently: bool = False, ) -> None: + """Create index in the database.""" if isinstance(index, ExactNearestNeighbor): await self.adrop_vector_index() return @@ -785,6 +922,7 @@ async def aapply_vector_index( await self.engine._aexecute(stmt) async def areindex(self, index_name: Optional[str] = None) -> None: + """Re-index the database.""" index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX query = f"REINDEX INDEX {index_name};" await self.engine._aexecute(query) @@ -793,6 +931,7 @@ async def adrop_vector_index( self, index_name: Optional[str] = None, ) -> None: + """Drop the vector index.""" index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX query = f"DROP INDEX IF EXISTS {index_name};" await self.engine._aexecute(query) @@ -801,6 +940,7 @@ async def is_valid_index( self, index_name: Optional[str] = None, ) -> bool: + """Check if index exists in the table.""" index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX query = f""" SELECT tablename, indexname From 1017eeab743622d2b9c727139f443bbe0d3994ce Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 12:39:46 -0400 Subject: [PATCH 04/16] mod --- src/langchain_google_alloydb_pg/chat_message_history.py | 2 +- src/langchain_google_alloydb_pg/engine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/langchain_google_alloydb_pg/chat_message_history.py b/src/langchain_google_alloydb_pg/chat_message_history.py index c5ce0a5f..7d7849bc 100644 --- a/src/langchain_google_alloydb_pg/chat_message_history.py +++ b/src/langchain_google_alloydb_pg/chat_message_history.py @@ -51,7 +51,7 @@ def __init__( table_name: str, messages: List[BaseMessage], ): - """_summary_ + """AlloyDBChatMessageHistory constructor. Args: key (object): Key to prevent direct constructor usage. diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index c54edc6b..b0707ec8 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -270,7 +270,7 @@ async def afrom_instance( ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, service_account_email: Optional[str] = None, ) -> AlloyDBEngine: - """_summary_ + """Create an AlloyDBEngine from an AlloyDB instance. Args: project_id (str): GCP project ID. From 1dd09ff893363064e84f478dd5c0b4ebaa2ac4ea Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 12:50:03 -0400 Subject: [PATCH 05/16] revert change from other pr --- src/langchain_google_alloydb_pg/engine.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index b0707ec8..bfca04f3 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -229,14 +229,12 @@ async def _create( db_user = user # otherwise use automatic IAM database authentication else: - if service_account_email: - db_user = service_account_email - else: - # get application default credentials - credentials, _ = google.auth.default( - scopes=["https://www.googleapis.com/auth/userinfo.email"] - ) - db_user = await _get_iam_principal_email(credentials) + # get application default credentials + credentials, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/userinfo.email"] + ) + db_user = await _get_iam_principal_email(credentials) + enable_iam_auth = True # anonymous function to be used for SQLAlchemy 'creator' argument async def getconn() -> asyncpg.Connection: From a248067b771ae99ce011bc65c613a60e6e3f5d92 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 13:56:20 -0400 Subject: [PATCH 06/16] delete service account impl --- src/langchain_google_alloydb_pg/engine.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index bfca04f3..dda53a28 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -138,7 +138,6 @@ def from_instance( user: Optional[str] = None, password: Optional[str] = None, ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, - service_account_email: Optional[str] = None, ) -> AlloyDBEngine: """Create an AlloyDBEngine from an AlloyDB instance. @@ -151,7 +150,7 @@ def from_instance( user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. - service_account_email (Optional[str], optional): IAM service account email. Defaults to None. + iam_account_email (Optional[str], optional): IAM service account email. Defaults to None. Returns: AlloyDBEngine: A newly created AlloyDBEngine instance. @@ -172,7 +171,6 @@ def from_instance( password, loop=loop, thread=thread, - service_account_email=service_account_email, ) return asyncio.run_coroutine_threadsafe(coro, loop).result() @@ -189,7 +187,7 @@ async def _create( password: Optional[str] = None, loop: Optional[asyncio.AbstractEventLoop] = None, thread: Optional[Thread] = None, - service_account_email: Optional[str] = None, + iam_account_email: Optional[str] = None, ) -> AlloyDBEngine: """Create an AlloyDBEngine from an AlloyDB instance. @@ -202,7 +200,7 @@ async def _create( user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. - service_account_email (Optional[str], optional): IAM service account email. + iam_account_email (Optional[str], optional): IAM service account email. Raises: ValueError: Raises error if only one of 'user' or 'password' is specified. @@ -266,7 +264,6 @@ async def afrom_instance( user: Optional[str] = None, password: Optional[str] = None, ip_type: Union[str, IPTypes] = IPTypes.PUBLIC, - service_account_email: Optional[str] = None, ) -> AlloyDBEngine: """Create an AlloyDBEngine from an AlloyDB instance. @@ -279,7 +276,7 @@ async def afrom_instance( user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. - service_account_email (Optional[str], optional): IAM service account email. Defaults to None. + iam_account_email (Optional[str], optional): IAM service account email. Defaults to None. Returns: AlloyDBEngine: A newly created AlloyDBEngine instance. @@ -293,7 +290,6 @@ async def afrom_instance( ip_type, user, password, - service_account_email=service_account_email, ) @classmethod From ce670589183063d524466c9319af08bdd5b0290b Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 17:22:17 -0400 Subject: [PATCH 07/16] add on --- .../chat_message_history.py | 2 +- src/langchain_google_alloydb_pg/engine.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/langchain_google_alloydb_pg/chat_message_history.py b/src/langchain_google_alloydb_pg/chat_message_history.py index 7d7849bc..1d77ae9e 100644 --- a/src/langchain_google_alloydb_pg/chat_message_history.py +++ b/src/langchain_google_alloydb_pg/chat_message_history.py @@ -27,7 +27,7 @@ async def _aget_messages( engine: AlloyDBEngine, session_id: str, table_name: str ) -> List[BaseMessage]: - """Retrieve the messages from AlloyDB""" + """Retrieve the messages from AlloyDB.""" query = f"""SELECT data, type FROM "{table_name}" WHERE session_id = :session_id ORDER BY id;""" results = await engine._afetch(query, {"session_id": session_id}) if not results: diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index dda53a28..d83a8e44 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -95,8 +95,8 @@ def __post_init__(self) -> None: """Check if initialization parameters are valid. Raises: - ValueError: Raises error if Column name is not string. - ValueError: Raises error if data_type is not type string. + ValueError: If Column name is not string. + ValueError: If data_type is not type string. """ if not isinstance(self.name, str): @@ -118,6 +118,17 @@ def __init__( loop: Optional[asyncio.AbstractEventLoop], thread: Optional[Thread], ) -> None: + """AlloyDBEngine constructor. + + Args: + key(object): Prevent direct constructor usage. + engine(AsyncEngine): Async engine to create AlloyDBEngine from. + loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine. + thread (Optional[Thread] = None): Thread used to create the engine async. + + Returns: + AlloyDBEngine: A newly created AlloyDBEngine instance. + """ if key != AlloyDBEngine.__create_key: raise Exception( @@ -197,9 +208,11 @@ async def _create( cluster (str): Cloud AlloyDB cluster name. instance (str): Cloud AlloyDB instance name. database (str): Database name. + ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. user (Optional[str], optional): Cloud AlloyDB user name. Defaults to None. password (Optional[str], optional): Cloud AlloyDB user password. Defaults to None. - ip_type (Union[str, IPTypes], optional): IP address type. Defaults to IPTypes.PUBLIC. + loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine. + thread (Optional[Thread] = None): Thread used to create the engine async. iam_account_email (Optional[str], optional): IAM service account email. Raises: From c96517c5d611e6bcf317e119c1dd86cbb2b41129 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 17:23:30 -0400 Subject: [PATCH 08/16] fix error --- src/langchain_google_alloydb_pg/engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index d83a8e44..14323fe2 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -126,8 +126,8 @@ def __init__( loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine. thread (Optional[Thread] = None): Thread used to create the engine async. - Returns: - AlloyDBEngine: A newly created AlloyDBEngine instance. + Raises: + Exception: If the constructor is called directly by the user. """ if key != AlloyDBEngine.__create_key: From 91f5bcf9725a0375fe3d26114bcc90b2fcddf696 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 17:30:36 -0400 Subject: [PATCH 09/16] fix typo --- src/langchain_google_alloydb_pg/engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index 14323fe2..0daee843 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -325,7 +325,7 @@ async def _aexecute_outside_tx(self, query: str) -> None: async def _afetch( self, query: str, params: Optional[dict] = None ) -> Sequence[RowMapping]: - """Fetch results froma SQL query.""" + """Fetch results from a SQL query.""" async with self._engine.connect() as conn: result = await conn.execute(text(query), params) result_map = result.mappings() @@ -338,7 +338,7 @@ def _execute(self, query: str, params: Optional[dict] = None) -> None: return self._run_as_sync(self._aexecute(query, params)) def _fetch(self, query: str, params: Optional[dict] = None) -> Sequence[RowMapping]: - """Fetch results froma SQL query.""" + """Fetch results from a SQL query.""" return self._run_as_sync(self._afetch(query, params)) def _run_as_sync(self, coro: Awaitable[T]) -> T: From 4fee6b44a8fd18229b9f0de130bc027be54470f3 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 17:34:48 -0400 Subject: [PATCH 10/16] fix typo --- src/langchain_google_alloydb_pg/loader.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/langchain_google_alloydb_pg/loader.py b/src/langchain_google_alloydb_pg/loader.py index 91733b3b..d089cd80 100644 --- a/src/langchain_google_alloydb_pg/loader.py +++ b/src/langchain_google_alloydb_pg/loader.py @@ -43,19 +43,19 @@ def text_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: def csv_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: - """CSV documennt formatter.""" + """CSV document formatter.""" return ", ".join(str(row[column]) for column in content_columns if column in row) def yaml_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: - """YAML documennt formatter.""" + """YAML document formatter.""" return "\n".join( f"{column}: {str(row[column])}" for column in content_columns if column in row ) def json_formatter(row: Dict[str, Any], content_columns: Iterable[str]) -> str: - """JSON documennt formatter.""" + """JSON document formatter.""" dictionary: Dict[str, Any] = {} for column in content_columns: if column in row: @@ -125,7 +125,7 @@ def __init__( formatter: Callable[[Dict[str, Any], Iterable[str]], str], metadata_json_column: Optional[str] = None, ) -> None: - """AlloyDBLoader initialization. + """Create an AlloyDBLoader instance. Args: key (object): Prevent direct constructor usage. @@ -551,4 +551,11 @@ async def adelete(self, docs: List[Document]) -> None: await self.engine._aexecute(stmt, values) def delete(self, docs: List[Document]) -> None: + """ + Delete all instances of a document from the DocumentSaver table by matching the entire Document + object. + + Args: + docs (List[langchain_core.documents.Document]): a list of documents to be deleted. + """ self.engine._run_as_sync(self.adelete(docs)) From 4d5903e9b81ac4344cfce25ee444aec477437440 Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 17:36:05 -0400 Subject: [PATCH 11/16] fix typo 2 --- src/langchain_google_alloydb_pg/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/langchain_google_alloydb_pg/loader.py b/src/langchain_google_alloydb_pg/loader.py index d089cd80..27b54eb4 100644 --- a/src/langchain_google_alloydb_pg/loader.py +++ b/src/langchain_google_alloydb_pg/loader.py @@ -125,7 +125,7 @@ def __init__( formatter: Callable[[Dict[str, Any], Iterable[str]], str], metadata_json_column: Optional[str] = None, ) -> None: - """Create an AlloyDBLoader instance. + """AlloyDBLoader constructor. Args: key (object): Prevent direct constructor usage. @@ -139,7 +139,7 @@ def __init__( formatter (Optional[Callable], optional): A function to format page content (OneOf: format, formatter). Defaults to None. Raises: - Excaption: + Excaption: If called directly by user. """ if key != AlloyDBLoader.__create_key: raise Exception( From 2c67b76ccb2b68d1a4dc8ada1d350d3ac3ee78da Mon Sep 17 00:00:00 2001 From: duwenxin99 Date: Thu, 18 Jul 2024 17:59:26 -0400 Subject: [PATCH 12/16] more docstring improvement --- src/langchain_google_alloydb_pg/loader.py | 9 ++--- .../vectorstore.py | 40 +++++++++++++------ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/langchain_google_alloydb_pg/loader.py b/src/langchain_google_alloydb_pg/loader.py index 27b54eb4..68a5f6d5 100644 --- a/src/langchain_google_alloydb_pg/loader.py +++ b/src/langchain_google_alloydb_pg/loader.py @@ -131,15 +131,14 @@ def __init__( key (object): Prevent direct constructor usage. engine (AlloyDBEngine): AsyncEngine with pool connection to the postgres database query (Optional[str], optional): SQL query. Defaults to None. - table_name (Optional[str], optional): Name of table to query. Defaults to None. content_columns (Optional[List[str]], optional): Column that represent a Document's page_content. Defaults to the first column. metadata_columns (Optional[List[str]], optional): Column(s) that represent a Document's metadata. Defaults to None. - metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". - format (Optional[str], optional): Format of page content (OneOf: text, csv, YAML, JSON). Defaults to 'text'. formatter (Optional[Callable], optional): A function to format page content (OneOf: format, formatter). Defaults to None. + metadata_json_column (Optional[str], optional): Column to store metadata as JSON. Defaults to "langchain_metadata". + Raises: - Excaption: If called directly by user. + Exception: If called directly by user. """ if key != AlloyDBLoader.__create_key: raise Exception( @@ -351,7 +350,7 @@ def __init__( metadata_columns: List[str] = [], metadata_json_column: Optional[str] = None, ) -> None: - """Create an AlloyDBDocumentSaver instance. + """AlloyDBDocumentSaver constructor. Args: key (object): Prevent direct constructor usage. diff --git a/src/langchain_google_alloydb_pg/vectorstore.py b/src/langchain_google_alloydb_pg/vectorstore.py index 495699ca..353cd7bb 100644 --- a/src/langchain_google_alloydb_pg/vectorstore.py +++ b/src/langchain_google_alloydb_pg/vectorstore.py @@ -59,7 +59,7 @@ def __init__( lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, ): - """Constructor for AlloyDBVectorStore. + """AlloyDBVectorStore constructor. Args: key (object): Prevent direct constructor usage. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. @@ -119,8 +119,8 @@ async def create( index_query_options: Optional[QueryOptions] = None, ) -> AlloyDBVectorStore: """Create an AlloyDBVectorStore instance. + Args: - key (object): Prevent direct constructor usage. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. embedding_service (Embeddings): Text embedding model to use. table_name (str): Name of the existing table or the table to be created. @@ -223,6 +223,7 @@ def create_sync( index_query_options: Optional[QueryOptions] = None, ) -> AlloyDBVectorStore: """Create an AlloyDBVectorStore instance. + Args: key (object): Prevent direct constructor usage. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. @@ -334,7 +335,7 @@ async def aadd_documents( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: - """Embed documents and add to the table""" + """Embed documents and add to the table.""" texts = [doc.page_content for doc in documents] metadatas = [doc.metadata for doc in documents] ids = await self.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) @@ -358,7 +359,7 @@ def add_documents( ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: - """Embed documents and add to the table""" + """Embed documents and add to the table.""" return self.engine._run_as_sync(self.aadd_documents(documents, ids, **kwargs)) async def adelete( @@ -366,7 +367,7 @@ async def adelete( ids: Optional[List[str]] = None, **kwargs: Any, ) -> Optional[bool]: - """Delete records from the table""" + """Delete records from the table.""" if not ids: return False @@ -380,7 +381,7 @@ def delete( ids: Optional[List[str]] = None, **kwargs: Any, ) -> Optional[bool]: - """Delete records from the table""" + """Delete records from the table.""" return self.engine._run_as_sync(self.adelete(ids, **kwargs)) @classmethod @@ -401,11 +402,14 @@ async def afrom_texts( # type: ignore[override] **kwargs: Any, ) -> AlloyDBVectorStore: """Create an AlloyDBVectorStore instance from texts. + Args: texts (List[str]): Texts to add to the vector store. + embedding (Embeddings): Text embedding model to use. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. - embedding_service (Embeddings): Text embedding model to use. table_name (str): Name of the existing table or the table to be created. + metadatas (Optional[List[dict]]): List of metadatas to add to table records. + ids: (Optional[List[str]]): List of IDs to add to table records. content_column (str): Column that represent a Document’s page_content. Defaults to "content". embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". metadata_columns (List[str]): Column(s) that represent a document's metadata. @@ -447,11 +451,14 @@ async def afrom_documents( # type: ignore[override] **kwargs: Any, ) -> AlloyDBVectorStore: """Create an AlloyDBVectorStore instance from documents. + Args: documents (List[Document]): Documents to add to the vector store. + embedding (Embeddings): Text embedding model to use. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. - embedding_service (Embeddings): Text embedding model to use. table_name (str): Name of the existing table or the table to be created. + metadatas (Optional[List[dict]]): List of metadatas to add to table records. + ids: (Optional[List[str]]): List of IDs to add to table records. content_column (str): Column that represent a Document’s page_content. Defaults to "content". embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". metadata_columns (List[str]): Column(s) that represent a document's metadata. @@ -462,6 +469,7 @@ async def afrom_documents( # type: ignore[override] Returns: AlloyDBVectorStore """ + vs = await cls.create( engine, embedding, @@ -496,11 +504,14 @@ def from_texts( # type: ignore[override] **kwargs: Any, ) -> AlloyDBVectorStore: """Create an AlloyDBVectorStore instance from texts. + Args: texts (List[str]): Texts to add to the vector store. + embedding (Embeddings): Text embedding model to use. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. - embedding_service (Embeddings): Text embedding model to use. table_name (str): Name of the existing table or the table to be created. + metadatas (Optional[List[dict]]): List of metadatas to add to table records. + ids: (Optional[List[str]]): List of IDs to add to table records. content_column (str): Column that represent a Document’s page_content. Defaults to "content". embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". metadata_columns (List[str]): Column(s) that represent a document's metadata. @@ -545,11 +556,14 @@ def from_documents( # type: ignore[override] **kwargs: Any, ) -> AlloyDBVectorStore: """Create an AlloyDBVectorStore instance from documents. + Args: documents (List[Document]): Documents to add to the vector store. + embedding (Embeddings): Text embedding model to use. engine (AlloyDBEngine): Connection pool engine for managing connections to AlloyDB database. - embedding_service (Embeddings): Text embedding model to use. table_name (str): Name of the existing table or the table to be created. + metadatas (Optional[List[dict]]): List of metadatas to add to table records. + ids: (Optional[List[str]]): List of IDs to add to table records. content_column (str): Column that represent a Document’s page_content. Defaults to "content". embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". metadata_columns (List[str]): Column(s) that represent a document's metadata. @@ -604,7 +618,7 @@ def similarity_search( filter: Optional[str] = None, **kwargs: Any, ) -> List[Document]: - """Return doc selected by similarity search on query.""" + """Return docs selected by similarity search on query.""" return self.engine._run_as_sync( self.asimilarity_search(query, k=k, filter=filter, **kwargs) ) @@ -897,7 +911,7 @@ async def aapply_vector_index( name: Optional[str] = None, concurrently: bool = False, ) -> None: - """Create index in the database.""" + """Create index in the vector store table.""" if isinstance(index, ExactNearestNeighbor): await self.adrop_vector_index() return @@ -922,7 +936,7 @@ async def aapply_vector_index( await self.engine._aexecute(stmt) async def areindex(self, index_name: Optional[str] = None) -> None: - """Re-index the database.""" + """Re-index the vector store table.""" index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX query = f"REINDEX INDEX {index_name};" await self.engine._aexecute(query) From 347d8646128d62eaef0066201987ff931d8d1221 Mon Sep 17 00:00:00 2001 From: Wenxin Du <117315983+duwenxin99@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:51:01 -0400 Subject: [PATCH 13/16] Update src/langchain_google_alloydb_pg/chat_message_history.py Co-authored-by: Averi Kitsch --- src/langchain_google_alloydb_pg/chat_message_history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langchain_google_alloydb_pg/chat_message_history.py b/src/langchain_google_alloydb_pg/chat_message_history.py index 1d77ae9e..66d83747 100644 --- a/src/langchain_google_alloydb_pg/chat_message_history.py +++ b/src/langchain_google_alloydb_pg/chat_message_history.py @@ -55,7 +55,7 @@ def __init__( Args: key (object): Key to prevent direct constructor usage. - engine (AlloyDBEngine): AlloyDB engine to use. + engine (AlloyDBEngine): database connection pool. session_id (str): Retrieve the table content with this session ID. table_name (str): Table name that stores the chat message history. messages (List[BaseMessage]): Messages to store. From 5cb46862419e4be82b5446fdb08b84a920187cc8 Mon Sep 17 00:00:00 2001 From: Wenxin Du <117315983+duwenxin99@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:51:12 -0400 Subject: [PATCH 14/16] Update src/langchain_google_alloydb_pg/engine.py Co-authored-by: Averi Kitsch --- src/langchain_google_alloydb_pg/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index 0daee843..273b8d73 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -122,7 +122,7 @@ def __init__( Args: key(object): Prevent direct constructor usage. - engine(AsyncEngine): Async engine to create AlloyDBEngine from. + engine(AsyncEngine): Async engine connection pool. loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine. thread (Optional[Thread] = None): Thread used to create the engine async. From 2fa71c9be73e7ced13868d9d41928af37e0f036a Mon Sep 17 00:00:00 2001 From: Wenxin Du <117315983+duwenxin99@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:51:24 -0400 Subject: [PATCH 15/16] Update src/langchain_google_alloydb_pg/engine.py Co-authored-by: Averi Kitsch --- src/langchain_google_alloydb_pg/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index 273b8d73..3f9ffea7 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -307,7 +307,7 @@ async def afrom_instance( @classmethod def from_engine(cls: Type[AlloyDBEngine], engine: AsyncEngine) -> AlloyDBEngine: - """Create an AlloyDBEngine instance from engine.""" + """Create an AlloyDBEngine instance from an AsyncEngine.""" return cls(cls.__create_key, engine, None, None) async def _aexecute(self, query: str, params: Optional[dict] = None) -> None: From eba776732906b0561f95d904fe6fa01cd2532cf5 Mon Sep 17 00:00:00 2001 From: duwenxin Date: Mon, 22 Jul 2024 12:00:27 -0400 Subject: [PATCH 16/16] resolve comment --- src/langchain_google_alloydb_pg/engine.py | 24 +++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/langchain_google_alloydb_pg/engine.py b/src/langchain_google_alloydb_pg/engine.py index 3f9ffea7..2ad90cfd 100644 --- a/src/langchain_google_alloydb_pg/engine.py +++ b/src/langchain_google_alloydb_pg/engine.py @@ -360,7 +360,7 @@ async def ainit_vectorstore_table( store_metadata: bool = True, ) -> None: """ - Create a table for saving of vectors to be used with Alloy DB. + Create a table for saving of vectors to be used with AlloyDB. If table already exists and overwrite flag is not set, a TABLE_ALREADY_EXISTS error is thrown. Args: @@ -414,7 +414,7 @@ def init_vectorstore_table( store_metadata: bool = True, ) -> None: """ - Create a table for saving of vectors to be used with Alloy DB. + Create a table for saving of vectors to be used with AlloyDB. If table already exists and overwrite flag is not set, a TABLE_ALREADY_EXISTS error is thrown. Args: @@ -452,7 +452,15 @@ def init_vectorstore_table( ) async def ainit_chat_history_table(self, table_name: str) -> None: - """Create a new chat history table.""" + """ + Create an AlloyDB table to save chat history messages. + + Args: + table_name (str): The table name to store chat history. + + Returns: + None + """ create_table_query = f"""CREATE TABLE IF NOT EXISTS "{table_name}"( id SERIAL PRIMARY KEY, session_id TEXT NOT NULL, @@ -462,7 +470,15 @@ async def ainit_chat_history_table(self, table_name: str) -> None: await self._aexecute(create_table_query) def init_chat_history_table(self, table_name: str) -> None: - """Create a new chat history table.""" + """ + Create an AlloyDB table to save chat history messages. + + Args: + table_name (str): The table name to store chat history. + + Returns: + None + """ return self._run_as_sync( self.ainit_chat_history_table( table_name,