From a77b0094c7be0a87693786c288b99f87d26806fe Mon Sep 17 00:00:00 2001 From: PhilippGawlik Date: Thu, 18 Jul 2024 10:55:18 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Added=20backend=20function?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api.py | 13 ++++++--- config.py | 10 +++++++ config.yaml | 25 ++++++++++++++++++ interface/__init__.py | 0 interface/response_models.py | 2 +- requirements.txt | 46 ++++++++++++++++++++++++++++++++ src/__init__.py | 0 src/context.py | 51 ++++++++++++++++++++++++++++++++++++ src/generate_with_azure.py | 32 ++++++++++++++++++++++ src/generate_with_openai.py | 29 ++++++++++++++++++++ src/prompt.py | 22 ++++++++++++++++ 11 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 config.py create mode 100644 interface/__init__.py create mode 100644 src/__init__.py create mode 100644 src/context.py create mode 100644 src/generate_with_azure.py create mode 100644 src/generate_with_openai.py create mode 100644 src/prompt.py diff --git a/api.py b/api.py index 54586fe..14a44cc 100644 --- a/api.py +++ b/api.py @@ -6,6 +6,10 @@ from interface.response_models import ResponseModel from interface.request_models import RequestModel +from src.context import get_context +#from src.generate_with_azure import generate_answer +from src.generate_with_openai import generate_answer +from src.prompt import assemble_prompt APP = FastAPI( @@ -44,12 +48,15 @@ async def redirect(): response_model=ResponseModel ) def answer_a_question(query: RequestModel) -> ResponseModel: + context = get_context(query.question) + prompt = assemble_prompt(query.question, context) + answer = generate_answer(prompt) return ResponseModel( status="ok", msg="Successfully generated answer", - answer="Working on it", - cta=[], - refs=[] + answer=answer, + cta=[c.metadata["metadata_storage_path"] for c in context], + refs=[c.metadata["title"] for c in context] ) diff --git a/config.py b/config.py new file mode 100644 index 0000000..d51e018 --- /dev/null +++ b/config.py @@ -0,0 +1,10 @@ +import os + +AZURE_ENDPOINT: str = os.environ["AZURE_ENDPOINT"] +AZURE_OPENAI_API_KEY: str = os.environ["AZURE_OPENAI_API_KEY"] +AZURE_OPENAI_API_VERSION: str = os.environ["AZURE_OPENAI_API_VERSION"] +AZURE_OPENAI_DEPLOYMENT: str = os.environ["AZURE_OPENAI_DEPLOYMENT"] +AZURE_DEPLOYMENT: str = os.environ["AZURE_DEPLOYMENT"] +VECTOR_STORE_ADDRESS: str = os.environ["VECTOR_STORE_ADDRESS"] +VECTOR_STORE_PASSWORD: str = os.environ["VECTOR_STORE_PASSWORD"] +INDEX_NAME: str = os.environ["INDEX_NAME"] diff --git a/config.yaml b/config.yaml index f05b474..26c6a9d 100644 --- a/config.yaml +++ b/config.yaml @@ -4,6 +4,31 @@ install: command: apt update && pip install -r requirements.txt run: command: python api.py +envs: + - name: AZURE_ENDPOINT + value: https://hackathon-openai-1.openai.azure.com/ + - name: AZURE_OPENAI_API_KEY + value: sm://{{.projectId}}/AZURE_OPENAI_API_KEY + - name: AZURE_OPENAI_API_VERSION + value: "2024-06-01" + - name: AZURE_OPENAI_DEPLOYMENT + value: "alt-text-gpt-4" + - name: AZURE_DEPLOYMENT + value: "Hackathon-Embeddings-ADA" + - name: VECTOR_STORE_ADDRESS + value: "https://hackathon-ai-search-1.search.windows.net" + - name: VECTOR_STORE_PASSWORD + value: sm://{{.projectId}}/VECTOR_STORE_PASSWORD + - name: INDEX_NAME + value: "buddy-ois1" + - name: AZURESEARCH_FIELDS_CONTENT_VECTOR + value: "text_vector" + - name: AZURESEARCH_FIELDS_CONTENT + value: "chunk" + - name: AZURESEARCH_FIELDS_ID + value: "chunk_id" + - name: OPENAI_API_KEY + value: sm://{{.projectId}}/openai-lab-token settings: type: service security: diff --git a/interface/__init__.py b/interface/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/interface/response_models.py b/interface/response_models.py index f61b426..16a088a 100644 --- a/interface/response_models.py +++ b/interface/response_models.py @@ -4,7 +4,7 @@ class ResponseModel(BaseModel): answer: str - refs: list[Mapping] + refs: list[str] status: str msg: str cta: list[Any] diff --git a/requirements.txt b/requirements.txt index 80b2d48..9f86dc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,34 +1,80 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.4.0 +async-timeout==4.0.3 +attrs==23.2.0 +azure-common==1.1.28 +azure-core==1.30.2 +azure-identity==1.17.1 +azure-search-documents==11.4.0 certifi==2024.7.4 +cffi==1.16.0 +charset-normalizer==3.3.2 click==8.1.7 +cryptography==42.0.8 +dataclasses-json==0.6.7 +distro==1.9.0 dnspython==2.6.1 email_validator==2.2.0 exceptiongroup==1.2.2 fastapi==0.111.1 fastapi-cli==0.0.4 +frozenlist==1.4.1 +greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httptools==0.6.1 httpx==0.27.0 idna==3.7 +isodate==0.6.1 Jinja2==3.1.4 +jsonpatch==1.33 +jsonpointer==3.0.0 +langchain==0.2.9 +langchain-community==0.2.7 +langchain-core==0.2.20 +langchain-openai==0.1.16 +langchain-text-splitters==0.2.2 +langsmith==0.1.88 markdown-it-py==3.0.0 MarkupSafe==2.1.5 +marshmallow==3.21.3 mdurl==0.1.2 +msal==1.30.0 +msal-extensions==1.2.0 +multidict==6.0.5 +mypy-extensions==1.0.0 +numpy==1.26.4 +openai==1.35.14 +orjson==3.10.6 +packaging==23.2 +portalocker==2.10.1 +pycparser==2.22 pydantic==2.8.2 pydantic_core==2.20.1 Pygments==2.18.0 +PyJWT==2.8.0 python-dotenv==1.0.1 python-multipart==0.0.9 PyYAML==6.0.1 +regex==2024.5.15 +requests==2.32.3 rich==13.7.1 shellingham==1.5.4 +six==1.16.0 sniffio==1.3.1 +SQLAlchemy==2.0.31 starlette==0.37.2 +tenacity==8.5.0 +tiktoken==0.7.0 +tqdm==4.66.4 typer==0.12.3 +typing-inspect==0.9.0 typing_extensions==4.12.2 +urllib3==2.2.2 uvicorn==0.30.1 uvloop==0.19.0 watchfiles==0.22.0 websockets==12.0 +yarl==1.9.4 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/context.py b/src/context.py new file mode 100644 index 0000000..8d9163c --- /dev/null +++ b/src/context.py @@ -0,0 +1,51 @@ +import os + +os.environ["AZURESEARCH_FIELDS_ID"] = "chunk_id" +os.environ["AZURESEARCH_FIELDS_CONTENT"] = "chunk" +os.environ["AZURESEARCH_FIELDS_CONTENT_VECTOR"] = "text_vector" + +from typing import Any, Optional + +from langchain_community.vectorstores.azuresearch import AzureSearch +from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings +from langchain_core.documents.base import Document + +from config import ( + AZURE_ENDPOINT, + AZURE_OPENAI_API_KEY, + AZURE_OPENAI_API_VERSION, + AZURE_DEPLOYMENT, + VECTOR_STORE_ADDRESS, + VECTOR_STORE_PASSWORD, + INDEX_NAME +) + + +EMBEDDINGS: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings( + azure_deployment=AZURE_DEPLOYMENT, + #openai_api_version=azure_openai_api_version, + azure_endpoint=AZURE_ENDPOINT, + api_key=AZURE_OPENAI_API_KEY +) +VECTOR_STORE: AzureSearch = AzureSearch( + azure_search_endpoint=VECTOR_STORE_ADDRESS, + azure_search_key=VECTOR_STORE_PASSWORD, + index_name=INDEX_NAME, + embedding_function=EMBEDDINGS.embed_query +) + +def get_context( + query: str, + k: int = 3, + search_type: str = "hybrid" +) -> list[Optional[Document]]: + context = [] + docs = VECTOR_STORE.similarity_search( + query=query, + k=3, + search_type=search_type, + ) + if docs is not None and docs: + context.extend(docs) + + return context diff --git a/src/generate_with_azure.py b/src/generate_with_azure.py new file mode 100644 index 0000000..4f32d7d --- /dev/null +++ b/src/generate_with_azure.py @@ -0,0 +1,32 @@ +import os +from typing import Optional + +from langchain_core.messages import HumanMessage +from langchain_core.documents.base import Document +from langchain_openai import AzureChatOpenAI + +#from config import AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_VERSION, AZURE_ENDPOINT, AZURE_OPENAI_DEPLOYMENT + + +#os.environ["AZURE_OPENAI_API_KEY"] = "d647338022c248e3be1646d10a1896dd" +##os.environ["AZURE_OPENAI_ENDPOINT"] = "https://hackathon-openai-1.openai.azure.com/openai/deployments/alt-text-gpt-4/chat/completions" +#os.environ["AZURE_OPENAI_ENDPOINT"] = "https://hackathon-openai-1.openai.azure.com" +#os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview" +#os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "chat" + +#model = AzureChatOpenAI( +# openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], +# azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"], +#) +# +#def generate_answer(question: str, context: list[Optional[Document]]) -> str: +# message = HumanMessage( +# content="Translate this sentence from English to French. I love programming." +# ) +# print(model.invoke([message])) +# return "test" +# +# +#if __name__ == "__main__": +# generate_answer("bal", []) +# \ No newline at end of file diff --git a/src/generate_with_openai.py b/src/generate_with_openai.py new file mode 100644 index 0000000..82b262b --- /dev/null +++ b/src/generate_with_openai.py @@ -0,0 +1,29 @@ +from openai import OpenAI + + +CLIENT = OpenAI() + +SYSTEM_PROMPT = ( + "Du hilfst Mitarbeitenden beim Bayerischen Rundfunk bei ihren Fragen rund um den BR. " + "Die bist ein bayerisches Uhrgestein. " + "Dein Name ist 'Buddy'. " + "Du fragst nach, wenn Fragen zu allgemein formuliert sind, um so die Anwort einzugrenzen. " + "Du erfindest niemals Antworten. " + "Du bist immer freundlich und geduldigt. " + "Du erklärst in einfachen Worten." +) + + +def generate_answer(prompt: str, system_prompt: str=SYSTEM_PROMPT) -> str: + completion = CLIENT.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ] + ) + return completion.choices[0].message.content + + +if __name__ == "__main__": + print(generate_answer("Tell mie a joke")) \ No newline at end of file diff --git a/src/prompt.py b/src/prompt.py new file mode 100644 index 0000000..d42fef2 --- /dev/null +++ b/src/prompt.py @@ -0,0 +1,22 @@ + +from typing import Optional +from langchain_core.documents.base import Document + + +RAG_PROMPT_TEMPLATE = ( + "Beantworte die Frage basierend auf den folgenden Fakten:" + "\n\n" + "{bulletpoints}" + "\n\n" + "Frage: {question}" +) + + +def assemble_prompt(question: str, context: list[Optional[Document]], template: str = RAG_PROMPT_TEMPLATE) -> str: + bulletpoints = "\n- ".join([c.page_content.replace("\n", "") for c in context]) + bulletpoints = f"- {bulletpoints}" + return template.format( + bulletpoints=bulletpoints, + question=question + ) +