From a77b0094c7be0a87693786c288b99f87d26806fe Mon Sep 17 00:00:00 2001
From: PhilippGawlik <philipp.gawlik@posteo.de>
Date: Thu, 18 Jul 2024 10:55:18 +0200
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Added=20backend=20function?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api.py                       | 13 ++++++---
 config.py                    | 10 +++++++
 config.yaml                  | 25 ++++++++++++++++++
 interface/__init__.py        |  0
 interface/response_models.py |  2 +-
 requirements.txt             | 46 ++++++++++++++++++++++++++++++++
 src/__init__.py              |  0
 src/context.py               | 51 ++++++++++++++++++++++++++++++++++++
 src/generate_with_azure.py   | 32 ++++++++++++++++++++++
 src/generate_with_openai.py  | 29 ++++++++++++++++++++
 src/prompt.py                | 22 ++++++++++++++++
 11 files changed, 226 insertions(+), 4 deletions(-)
 create mode 100644 config.py
 create mode 100644 interface/__init__.py
 create mode 100644 src/__init__.py
 create mode 100644 src/context.py
 create mode 100644 src/generate_with_azure.py
 create mode 100644 src/generate_with_openai.py
 create mode 100644 src/prompt.py

diff --git a/api.py b/api.py
index 54586fe..14a44cc 100644
--- a/api.py
+++ b/api.py
@@ -6,6 +6,10 @@
 
 from interface.response_models import ResponseModel
 from interface.request_models import RequestModel
+from src.context import get_context
+#from src.generate_with_azure import generate_answer
+from src.generate_with_openai import generate_answer
+from src.prompt import assemble_prompt
 
 
 APP = FastAPI(
@@ -44,12 +48,15 @@ async def redirect():
     response_model=ResponseModel
 )
 def answer_a_question(query: RequestModel) -> ResponseModel:
+    context = get_context(query.question)
+    prompt = assemble_prompt(query.question, context)
+    answer = generate_answer(prompt)
     return ResponseModel(
         status="ok",
         msg="Successfully generated answer",
-        answer="Working on it",
-        cta=[],
-        refs=[]
+        answer=answer,
+        cta=[c.metadata["metadata_storage_path"] for c in context],
+        refs=[c.metadata["title"] for c in context]
     )
 
 
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..d51e018
--- /dev/null
+++ b/config.py
@@ -0,0 +1,10 @@
+import os
+
+AZURE_ENDPOINT: str = os.environ["AZURE_ENDPOINT"]
+AZURE_OPENAI_API_KEY: str = os.environ["AZURE_OPENAI_API_KEY"]
+AZURE_OPENAI_API_VERSION: str = os.environ["AZURE_OPENAI_API_VERSION"]
+AZURE_OPENAI_DEPLOYMENT: str = os.environ["AZURE_OPENAI_DEPLOYMENT"]
+AZURE_DEPLOYMENT: str = os.environ["AZURE_DEPLOYMENT"]
+VECTOR_STORE_ADDRESS: str = os.environ["VECTOR_STORE_ADDRESS"]
+VECTOR_STORE_PASSWORD: str = os.environ["VECTOR_STORE_PASSWORD"]
+INDEX_NAME: str = os.environ["INDEX_NAME"]
diff --git a/config.yaml b/config.yaml
index f05b474..26c6a9d 100644
--- a/config.yaml
+++ b/config.yaml
@@ -4,6 +4,31 @@ install:
   command: apt update && pip install -r requirements.txt
 run:
   command: python api.py
+envs:
+  - name: AZURE_ENDPOINT
+    value: https://hackathon-openai-1.openai.azure.com/
+  - name: AZURE_OPENAI_API_KEY
+    value: sm://{{.projectId}}/AZURE_OPENAI_API_KEY
+  - name: AZURE_OPENAI_API_VERSION
+    value: "2024-06-01"
+  - name: AZURE_OPENAI_DEPLOYMENT
+    value: "alt-text-gpt-4"
+  - name: AZURE_DEPLOYMENT
+    value: "Hackathon-Embeddings-ADA"
+  - name: VECTOR_STORE_ADDRESS
+    value: "https://hackathon-ai-search-1.search.windows.net"
+  - name: VECTOR_STORE_PASSWORD
+    value: sm://{{.projectId}}/VECTOR_STORE_PASSWORD
+  - name: INDEX_NAME
+    value: "buddy-ois1"
+  - name: AZURESEARCH_FIELDS_CONTENT_VECTOR
+    value: "text_vector"
+  - name: AZURESEARCH_FIELDS_CONTENT
+    value: "chunk"
+  - name: AZURESEARCH_FIELDS_ID
+    value: "chunk_id"
+  - name: OPENAI_API_KEY
+    value: sm://{{.projectId}}/openai-lab-token
 settings:
   type: service
   security:
diff --git a/interface/__init__.py b/interface/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/interface/response_models.py b/interface/response_models.py
index f61b426..16a088a 100644
--- a/interface/response_models.py
+++ b/interface/response_models.py
@@ -4,7 +4,7 @@
 
 class ResponseModel(BaseModel):
     answer: str
-    refs: list[Mapping]
+    refs: list[str]
     status: str
     msg: str
     cta: list[Any]
diff --git a/requirements.txt b/requirements.txt
index 80b2d48..9f86dc5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,34 +1,80 @@
+aiohttp==3.9.5
+aiosignal==1.3.1
 annotated-types==0.7.0
 anyio==4.4.0
+async-timeout==4.0.3
+attrs==23.2.0
+azure-common==1.1.28
+azure-core==1.30.2
+azure-identity==1.17.1
+azure-search-documents==11.4.0
 certifi==2024.7.4
+cffi==1.16.0
+charset-normalizer==3.3.2
 click==8.1.7
+cryptography==42.0.8
+dataclasses-json==0.6.7
+distro==1.9.0
 dnspython==2.6.1
 email_validator==2.2.0
 exceptiongroup==1.2.2
 fastapi==0.111.1
 fastapi-cli==0.0.4
+frozenlist==1.4.1
+greenlet==3.0.3
 h11==0.14.0
 httpcore==1.0.5
 httptools==0.6.1
 httpx==0.27.0
 idna==3.7
+isodate==0.6.1
 Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.2.9
+langchain-community==0.2.7
+langchain-core==0.2.20
+langchain-openai==0.1.16
+langchain-text-splitters==0.2.2
+langsmith==0.1.88
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
+marshmallow==3.21.3
 mdurl==0.1.2
+msal==1.30.0
+msal-extensions==1.2.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+numpy==1.26.4
+openai==1.35.14
+orjson==3.10.6
+packaging==23.2
+portalocker==2.10.1
+pycparser==2.22
 pydantic==2.8.2
 pydantic_core==2.20.1
 Pygments==2.18.0
+PyJWT==2.8.0
 python-dotenv==1.0.1
 python-multipart==0.0.9
 PyYAML==6.0.1
+regex==2024.5.15
+requests==2.32.3
 rich==13.7.1
 shellingham==1.5.4
+six==1.16.0
 sniffio==1.3.1
+SQLAlchemy==2.0.31
 starlette==0.37.2
+tenacity==8.5.0
+tiktoken==0.7.0
+tqdm==4.66.4
 typer==0.12.3
+typing-inspect==0.9.0
 typing_extensions==4.12.2
+urllib3==2.2.2
 uvicorn==0.30.1
 uvloop==0.19.0
 watchfiles==0.22.0
 websockets==12.0
+yarl==1.9.4
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/context.py b/src/context.py
new file mode 100644
index 0000000..8d9163c
--- /dev/null
+++ b/src/context.py
@@ -0,0 +1,51 @@
+import os
+
+os.environ["AZURESEARCH_FIELDS_ID"] = "chunk_id"
+os.environ["AZURESEARCH_FIELDS_CONTENT"] = "chunk"
+os.environ["AZURESEARCH_FIELDS_CONTENT_VECTOR"] = "text_vector"
+
+from typing import Any, Optional
+
+from langchain_community.vectorstores.azuresearch import AzureSearch
+from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
+from langchain_core.documents.base import Document
+
+from config import (
+    AZURE_ENDPOINT,
+    AZURE_OPENAI_API_KEY,
+    AZURE_OPENAI_API_VERSION,
+    AZURE_DEPLOYMENT,
+    VECTOR_STORE_ADDRESS,
+    VECTOR_STORE_PASSWORD,
+    INDEX_NAME
+)
+
+
+EMBEDDINGS: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
+    azure_deployment=AZURE_DEPLOYMENT,
+    #openai_api_version=azure_openai_api_version,
+    azure_endpoint=AZURE_ENDPOINT,
+    api_key=AZURE_OPENAI_API_KEY
+)
+VECTOR_STORE: AzureSearch = AzureSearch(
+    azure_search_endpoint=VECTOR_STORE_ADDRESS,
+    azure_search_key=VECTOR_STORE_PASSWORD,
+    index_name=INDEX_NAME,
+    embedding_function=EMBEDDINGS.embed_query
+)
+
+def get_context(
+    query: str,
+    k: int = 3,
+    search_type: str = "hybrid"
+) -> list[Optional[Document]]:
+    context = []
+    docs = VECTOR_STORE.similarity_search(
+        query=query,
+        k=3,
+        search_type=search_type,
+    )
+    if docs is not None and docs:
+        context.extend(docs)
+
+    return context
diff --git a/src/generate_with_azure.py b/src/generate_with_azure.py
new file mode 100644
index 0000000..4f32d7d
--- /dev/null
+++ b/src/generate_with_azure.py
@@ -0,0 +1,32 @@
+import os
+from typing import Optional
+
+from langchain_core.messages import HumanMessage
+from langchain_core.documents.base import Document
+from langchain_openai import AzureChatOpenAI
+
+#from config import AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_VERSION, AZURE_ENDPOINT, AZURE_OPENAI_DEPLOYMENT
+
+
+#os.environ["AZURE_OPENAI_API_KEY"] = "d647338022c248e3be1646d10a1896dd"
+##os.environ["AZURE_OPENAI_ENDPOINT"] = "https://hackathon-openai-1.openai.azure.com/openai/deployments/alt-text-gpt-4/chat/completions"
+#os.environ["AZURE_OPENAI_ENDPOINT"] = "https://hackathon-openai-1.openai.azure.com"
+#os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview"
+#os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "chat"
+
+#model = AzureChatOpenAI(
+#    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+#    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
+#)
+#
+#def generate_answer(question: str, context: list[Optional[Document]]) -> str:
+#    message = HumanMessage(
+#        content="Translate this sentence from English to French. I love programming."
+#    )
+#    print(model.invoke([message]))
+#    return "test"
+#
+#
+#if __name__ == "__main__":
+#    generate_answer("bal", [])
+#
\ No newline at end of file
diff --git a/src/generate_with_openai.py b/src/generate_with_openai.py
new file mode 100644
index 0000000..82b262b
--- /dev/null
+++ b/src/generate_with_openai.py
@@ -0,0 +1,29 @@
+from openai import OpenAI
+
+
+CLIENT = OpenAI()
+
+SYSTEM_PROMPT = (
+    "Du hilfst Mitarbeitenden beim Bayerischen Rundfunk bei ihren Fragen rund um den BR. "
+    "Die bist ein bayerisches Uhrgestein. "
+    "Dein Name ist 'Buddy'. "
+    "Du fragst nach, wenn Fragen zu allgemein formuliert sind, um so die Anwort einzugrenzen. "
+    "Du erfindest niemals Antworten. "
+    "Du bist immer freundlich und geduldigt. "
+    "Du erklärst in einfachen Worten."
+)
+
+
+def generate_answer(prompt: str, system_prompt: str=SYSTEM_PROMPT) -> str:
+    completion = CLIENT.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt}
+        ]
+        )
+    return completion.choices[0].message.content
+
+
+if __name__ == "__main__":
+    print(generate_answer("Tell mie a joke"))
\ No newline at end of file
diff --git a/src/prompt.py b/src/prompt.py
new file mode 100644
index 0000000..d42fef2
--- /dev/null
+++ b/src/prompt.py
@@ -0,0 +1,22 @@
+
+from typing import Optional
+from langchain_core.documents.base import Document
+
+
+RAG_PROMPT_TEMPLATE = (
+    "Beantworte die Frage basierend auf den folgenden Fakten:"
+    "\n\n"
+    "{bulletpoints}"
+    "\n\n"
+    "Frage: {question}"
+)
+
+
+def assemble_prompt(question: str, context: list[Optional[Document]], template: str = RAG_PROMPT_TEMPLATE) -> str:
+    bulletpoints = "\n- ".join([c.page_content.replace("\n", "") for c in context])
+    bulletpoints = f"- {bulletpoints}"
+    return template.format(
+        bulletpoints=bulletpoints,
+        question=question
+    )
+