Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ability to specify credentials wihen using Google BigQuery as a data loader #5466

Merged
merged 6 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions langchain/document_loaders/bigquery.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from typing import List, Optional
from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader

if TYPE_CHECKING:
from google.auth.credentials import Credentials


class BigQueryLoader(BaseLoader):
"""Loads a query result from BigQuery into a list of documents.
Expand All @@ -11,6 +16,7 @@ class BigQueryLoader(BaseLoader):
are written into the `page_content` of the document. The `metadata_columns`
are written into the `metadata` of the document. By default, all columns
are written into the `page_content` and none into the `metadata`.

"""

def __init__(
Expand All @@ -19,11 +25,28 @@ def __init__(
project: Optional[str] = None,
page_content_columns: Optional[List[str]] = None,
metadata_columns: Optional[List[str]] = None,
credentials: Optional[Credentials] = None,
):
"""Initialize BigQuery document loader.

Args:
query: The query to run in BigQuery.
project: Optional. The project to run the query in.
page_content_columns: Optional. The columns to write into the `page_content`
of the document.
metadata_columns: Optional. The columns to write into the `metadata` of the
document.
credentials : google.auth.credentials.Credentials, optional
Credentials for accessing Google APIs. Use this parameter to override
default credentials, such as to use Compute Engine
(`google.auth.compute_engine.Credentials`) or Service Account
(`google.oauth2.service_account.Credentials`) credentials directly.
"""
self.query = query
self.project = project
self.page_content_columns = page_content_columns
self.metadata_columns = metadata_columns
self.credentials = credentials

def load(self) -> List[Document]:
try:
Expand All @@ -34,7 +57,7 @@ def load(self) -> List[Document]:
"Please install it with `pip install google-cloud-bigquery`."
) from ex

bq_client = bigquery.Client(self.project)
bq_client = bigquery.Client(credentials=self.credentials, project=self.project)
query_result = bq_client.query(self.query).result()
docs: List[Document] = []

Expand Down
7 changes: 3 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pymongo = {version = "^4.3.3", optional = true}
clickhouse-connect = {version="^0.5.14", optional=true}
weaviate-client = {version = "^3", optional = true}
google-api-python-client = {version = "2.70.0", optional = true}
google-auth = {version = "^2.18.1", optional = true}
wolframalpha = {version = "5.0.0", optional = true}
anthropic = {version = "^0.2.6", optional = true}
qdrant-client = {version = "^1.1.2", optional = true, python = ">=3.8.1,<3.12"}
Expand Down Expand Up @@ -239,6 +240,7 @@ all = [
"weaviate-client",
"redis",
"google-api-python-client",
"google-auth",
"wolframalpha",
"qdrant-client",
"tensorflow-text",
Expand Down