Skip to content

Commit

Permalink
remove top level imports for reg
Browse files Browse the repository at this point in the history
  • Loading branch information
mpgreg committed Dec 7, 2023
1 parent 9787277 commit 7ed66af
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 13 deletions.
12 changes: 9 additions & 3 deletions dags/ingestion/FinSum_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@
import unicodedata
import uuid

OPENAI_CONN_ID = "openai_default"

logger = logging.getLogger("airflow.task")

edgar_headers={"User-Agent": "test1@test1.com"}

openai_hook = OpenAIHook("openai_default")
openai_client.api_key = openai_hook._get_api_key()

default_args = {"retries": 3, "retry_delay": 30, "trigger_rule": "none_failed"}


Expand Down Expand Up @@ -247,6 +246,8 @@ def vectorize(df: pd.DataFrame, content_column_name: str, output_file_name: Path
:param output_file_name: Path for saving embeddings as a parquet file
:return: Location of saved file
"""

openai_hook = OpenAIHook(OPENAI_CONN_ID)

df["id"] = df[content_column_name].apply(
lambda x: str(uuid.uuid5(
Expand All @@ -273,6 +274,9 @@ def chunk_summarization_openai(content: str, ticker: str, fy: str, fp: str) -> s
:param fp: The fiscal period of the document chunk for (status printing).
:return: A summary string
"""

openai_client.api_key = OpenAIHook(OPENAI_CONN_ID)._get_api_key()

logger.info(f"Summarizing chunk for ticker {ticker} {fy}:{fp}")

response = openai_client.ChatCompletion().create(
Expand Down Expand Up @@ -304,6 +308,8 @@ def doc_summarization_openai(content: str, doc_link: str) -> str:
:param doc_link: The URL of the document being summarized (status printing).
:return: A summary string
"""

openai_client.api_key = OpenAIHook(OPENAI_CONN_ID)._get_api_key()

logger.info(f"Summarizing document for {doc_link}")

Expand Down
19 changes: 15 additions & 4 deletions dags/ingestion/FinSum_pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,13 @@
import unicodedata
import uuid

PINECONE_CONN_ID = "pinecone_default"
OPENAI_CONN_ID = "openai_default"

logger = logging.getLogger("airflow.task")

edgar_headers={"User-Agent": "test1@test1.com"}

pinecone_hook = PineconeHook("pinecone_default")
openai_hook = OpenAIHook("openai_default")
openai_client.api_key = openai_hook._get_api_key()

index_names = ["tenq", "tenq-summary"]

default_args = {"retries": 3, "retry_delay": 30, "trigger_rule": "none_failed"}
Expand Down Expand Up @@ -68,13 +67,17 @@ def check_indexes() -> [str]:
"""
Check if indexes exists.
"""

pinecone_hook = PineconeHook(PINECONE_CONN_ID)

if set(index_names).issubset(set(pinecone_hook.list_indexes())):
return ["extract"]
else:
return ["create_indexes"]

def create_indexes(existing: str = "ignore"):

pinecone_hook = PineconeHook(PINECONE_CONN_ID)

for index_name in index_names:

Expand Down Expand Up @@ -276,6 +279,9 @@ def pinecone_ingest(df: pd.DataFrame, content_column_name: str, index_name: str)
:param index_name: The name of the index to import data.
"""

openai_hook = OpenAIHook(OPENAI_CONN_ID)
pinecone_hook = PineconeHook(PINECONE_CONN_ID)

df["metadata"] = df.drop([content_column_name], axis=1).to_dict('records')

df["id"] = df[content_column_name].apply(lambda x: str(uuid.uuid5(name=x+index_name,
Expand Down Expand Up @@ -304,6 +310,9 @@ def chunk_summarization_openai(content: str, ticker: str, fy: str, fp: str) -> s
:param fp: The fiscal period of the document chunk for (status printing).
:return: A summary string
"""

openai_client.api_key = OpenAIHook(OPENAI_CONN_ID)._get_api_key()

logger.info(f"Summarizing chunk for ticker {ticker} {fy}:{fp}")

response = openai_client.ChatCompletion().create(
Expand Down Expand Up @@ -336,6 +345,8 @@ def doc_summarization_openai(content: str, doc_link: str) -> str:
:return: A summary string
"""

openai_client.api_key = OpenAIHook(OPENAI_CONN_ID)._get_api_key()

logger.info(f"Summarizing document for {doc_link}")

response = openai_client.ChatCompletion().create(
Expand Down
18 changes: 12 additions & 6 deletions dags/ingestion/FinSum_weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@

edgar_headers={"User-Agent": "test1@test1.com"}

openai_hook = OpenAIHook("openai_default")
openai_client.api_key = openai_hook._get_api_key()

weaviate_hook = _WeaviateHook("weaviate_default")
weaviate_client = weaviate_hook.get_client()

class_names = ["TenQ", "TenQSummary"]

default_args = {"retries": 3, "retry_delay": 30, "trigger_rule": "none_failed"}
Expand Down Expand Up @@ -77,6 +71,8 @@ def check_schemas() -> str:
so check_schema_subset is used recursively to check that all objects in the requested schema are
represented in the current schema.
"""

weaviate_hook = _WeaviateHook("weaviate_default")

class_objects = get_schema()

Expand All @@ -90,6 +86,9 @@ def create_schemas():
"""
Creates the weaviate class schemas.
"""

weaviate_hook = _WeaviateHook("weaviate_default")

class_objects = get_schema()
weaviate_hook.create_schema(class_objects=class_objects, existing="ignore")

Expand Down Expand Up @@ -277,6 +276,8 @@ def weaviate_ingest(df: pd.DataFrame, class_name: str):
type class_name: str
"""

weaviate_hook = _WeaviateHook("weaviate_default")

df, uuid_column = weaviate_hook.generate_uuids(df=df, class_name=class_name)

weaviate_hook.ingest_data(
Expand All @@ -299,6 +300,9 @@ def chunk_summarization_openai(content: str, ticker: str, fy: str, fp: str) -> s
:param fp: The fiscal period of the document chunk for (status printing).
:return: A summary string
"""

openai_client.api_key = OpenAIHook("openai_default")._get_api_key()

logger.info(f"Summarizing chunk for ticker {ticker} {fy}:{fp}")

response = openai_client.ChatCompletion().create(
Expand Down Expand Up @@ -331,6 +335,8 @@ def doc_summarization_openai(content: str, doc_link: str) -> str:
:return: A summary string
"""

openai_client.api_key = OpenAIHook("openai_default")._get_api_key()

logger.info(f"Summarizing document for {doc_link}")

response = openai_client.ChatCompletion().create(
Expand Down

0 comments on commit 7ed66af

Please sign in to comment.