Merge pull request #12 from 20001LastOrder/main

Integrate Slack App with Pinecone
Aggregate-Intellect · Jun 26, 2023 · 87e89e6 · 87e89e6
2 parents a565dd5 + a019565
commit 87e89e6
Show file tree

Hide file tree

Showing 9 changed files with 2,210 additions and 13 deletions.
diff --git a/apps/.gitignore b/apps/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+**/.env
diff --git a/apps/slackbot/Dockerfile b/apps/slackbot/Dockerfile
@@ -21,4 +21,4 @@ COPY . .
 EXPOSE 80
 
 # Run the Flask app
-CMD ["python", "app.py"]
+CMD ["python", "bolt_app.py"]
diff --git a/apps/slackbot/README.md b/apps/slackbot/README.md
@@ -29,9 +29,12 @@ This repository contains a chatbot implementation using Flask and Slack. The cha
  . All these tokens should be added in .env file
 
     SLACK_SIGNING_SECRET: Slack apps signing secret.
-    SLACK_BOT_TOKEN: Slack bot token for authentication.
+    SLACK_OAUTH_TOKEN: Slack bot token for authentication.
     VERIFICATION_TOKEN: Slack verification token.
     OPENAI_API_KEY: OpenAI API key for language modeling.
+    PINECONE_INDEX: The Pinecone vector database index
+    PINECONE_API_KEY: The Pinecone vector database API key 
+    PINECONE_ENV: Region where the Pinecone index is deployed
 
     All these tokens should be added in .env file
 
@@ -47,6 +50,8 @@ This repository contains a chatbot implementation using Flask and Slack. The cha
     2.  Expose the server to the internet using a tool like ngrok. Not required in hosted on public IP
 
     3.  Set up the Slack app's Event Subscriptions and provide the ngrok URL as the Request URL.
+        * **NOTE:** When add the url to the Slack app, make sure to append `/slack/events` at the end as this is the default path used by Slack Bolt.
+
 
     # Reference 
     

diff --git a/apps/slackbot/app.py b/apps/slackbot/app.py
@@ -28,6 +28,8 @@
 import atexit
 load_dotenv()
 
+from vectorstores import get_local_db
+
 
 # This `app` represents your existing Flask app
 app = Flask(__name__)
@@ -176,20 +178,12 @@ def createIndex(pdf_folder_path):
     global loaders
     global chain
     global index
-    loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path, fn)) for fn in os.listdir(pdf_folder_path)]
-    # loaders
-    documents = []
-    for loader in loaders:
-        documents.extend(loader.load())
-
-    index = VectorstoreIndexCreator(
-        embedding=OpenAIEmbeddings(openai_api_key=OPENAI_KEY),
-        text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders)
+    retrival = get_local_db(pdf_folder_path, OPENAI_KEY)
 
     llm = OpenAI(model_name="gpt-3.5-turbo", openai_api_key=OPENAI_KEY)
     chain = RetrievalQA.from_chain_type(llm=llm,
                                         chain_type="stuff",
-                                        retriever=index.vectorstore.as_retriever(),
+                                        retrieve=retrival,
                                         input_key="question")
 
     return chain

diff --git a/apps/slackbot/bolt_app.py b/apps/slackbot/bolt_app.py
@@ -0,0 +1,153 @@
+##############################################
+#  Implementation of the slack app using Bolt     
+#  Importing necessary modules
+##############################################
+
+import os
+from dotenv import load_dotenv
+load_dotenv()
+from langchain.chat_models import ChatOpenAI
+from langchain import LLMChain
+from langchain.chains.question_answering import load_qa_chain
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS 
+from langchain.llms import OpenAI
+from os import environ
+from vectorstores import ConversationStore
+from prompt import SlackBotPrompt
+from slack_bolt import App
+
+
+
+# This `app` represents your existing Flask app
+app = App(
+    token=os.environ.get("SLACK_OAUTH_TOKEN"),
+    signing_secret=os.environ.get("SLACK_SIGNING_SECRET"),
+)
+
+
+#####################################################################################################
+# Setting up environment variables and Slack configuration:
+# The code retrieves various environment variables using os.environ.get() method.
+# Environment variables include Slack signing secret, OAuth token, verification token, and OpenAI key.
+#####################################################################################################
+
+SLACK_SIGNING_SECRET = environ.get("SLACK_SIGNING_SECRET")
+SLACK_OAUTH_TOKEN = environ.get("SLACK_OAUTH_TOKEN")
+VERIFICATION_TOKEN = environ.get("VERIFICATION_TOKEN")
+OPENAI_KEY=environ.get("OPENAI_KEY")
+
+
+
+###########################################################################
+# Instantiating Slack client and Flask app:
+###########################################################################
+
+#instantiating slack client
+os.environ['OPENAI_API_KEY'] = OPENAI_KEY
+
+@app.command("/hello-socket-mode")
+def hello_command(ack, body):
+    user_id = body["user_id"]
+    ack(f"Hi, <@{user_id}>!")
+
+bot = app.client.auth_test()
+print(bot)
+
+@app.event("app_mention")
+def event_test(client, say, event):
+    question = event['text']
+
+    thread_ts = event.get("thread_ts", None) or event["ts"]
+    replies = client.conversations_replies(channel=event['channel'], ts=thread_ts)
+    previous_messages = replies['messages'][:-1]
+
+    results = get_response(question, previous_messages)
+
+    say(results, thread_ts=thread_ts)
+
+@app.event("app_home_opened")
+def update_home_tab(client, event, logger):
+  try:
+    # views.publish is the method that your app uses to push a view to the Home tab
+    client.views_publish(
+      # the user that opened your app's app home
+      user_id=event["user"],
+      # the view object that appears in the app home
+      view={
+        "type": "home",
+        "callback_id": "home_view",
+
+        # body of the view
+        "blocks": [
+          {
+            "type": "section",
+            "text": {
+              "type": "mrkdwn",
+              "text": "*Welcome to your _App's Home_* :tada:"
+            }
+          },
+          {
+            "type": "divider"
+          },
+          {
+            "type": "section",
+            "text": {
+              "type": "mrkdwn",
+              "text": "This button won't do much for now but you can set up a listener for it using the `actions()` method and passing its unique `action_id`. See an example in the `examples` folder within your Bolt app."
+            }
+          },
+          {
+            "type": "actions",
+            "elements": [
+              {
+                "type": "button",
+                "text": {
+                  "type": "plain_text",
+                  "text": "Click me!"
+                }
+              }
+            ]
+          }
+        ]
+      }
+    )
+
+  except Exception as e:
+    logger.error(f"Error publishing home tab: {e}")
+
+def get_response(question, previous_messages):
+    llm = ChatOpenAI(
+        openai_api_key=OPENAI_KEY, request_timeout=120
+    )
+
+    prompt = SlackBotPrompt(
+       ai_name='Sherpa',
+       ai_id=bot['user_id'],
+       token_counter=llm.get_num_tokens,
+       input_variables=['query', 'messages', 'retriever']
+    )
+
+    retriever = ConversationStore.get_vector_retrieval(
+       'ReadTheDocs', OPENAI_KEY, index_name=os.getenv("PINECONE_INDEX")
+    )
+
+    chain = LLMChain(llm=llm, prompt=prompt)    
+
+    return chain.run(
+        query=question,
+        messages=previous_messages,
+        retriever=retriever,
+    )
+
+# Start the server on port 3000
+if __name__ == "__main__":
+    # documents = getDocuments('files')
+    # vectorstore = getVectoreStore(documents)
+    # qa = createLangchainQA(vectorstore)
+
+    # chain = createIndex("files")
+    print('Running the app')
+    app.start()
+    # SocketModeHandler(app, os.environ["SLACK_APP_TOKEN"]).start()
diff --git a/apps/slackbot/prompt.py b/apps/slackbot/prompt.py
@@ -0,0 +1,104 @@
+from pydantic import BaseModel
+from langchain.prompts.chat import BaseChatPromptTemplate
+from typing import Callable, Any, List
+from langchain.schema import (
+    BaseMessage, 
+    HumanMessage, 
+    SystemMessage,
+    AIMessage
+)
+import time
+from langchain.vectorstores.base import VectorStoreRetriever
+
+
+
+class SlackBotPrompt(BaseChatPromptTemplate, BaseModel):
+    ai_name: str
+    ai_id: str
+    token_counter: Callable[[str], int]
+    send_token_limit: int = 4196
+
+    def construct_base_prompt(self):
+        full_prompt = (
+            f"You are a friendly assistent bot called {self.ai_name}\n\n"
+        )
+
+        return full_prompt
+
+    def format_messages(self, **kwargs: Any) -> List[BaseMessage]:
+        base_prompt = SystemMessage(
+            content=self.construct_base_prompt()
+        )
+        time_prompt = SystemMessage(
+            content=f"The current time and date is {time.strftime('%c')}"
+        )
+        used_tokens = self.token_counter(base_prompt.content) + self.token_counter(
+            time_prompt.content
+        )
+
+        query = kwargs["query"]
+        retriever: VectorStoreRetriever = kwargs["retriever"]
+        previous_messages = self.process_chat_history(kwargs["messages"])
+
+        # retrieve relevant documents for the query
+        relevant_docs = retriever.get_relevant_documents(query)
+        relevant_memory = ["Document: " + d.page_content + "\nLink" + d.metadata["source"] + "\n" for d in relevant_docs]
+
+        # remove documents from memory until the token limit is reached
+        relevant_memory_tokens = sum(
+            [self.token_counter(doc) for doc in relevant_memory]
+        )
+        while used_tokens + relevant_memory_tokens > 2500:
+            relevant_memory = relevant_memory[:-1]
+            relevant_memory_tokens = sum(
+                [self.token_counter(doc) for doc in relevant_memory]
+            )
+
+        content_format = (
+            f"Here are some documents that may be relevant to the topic:"
+            f"\n{relevant_memory}\n\n"
+        )
+
+        input_message = (
+            f"Use the above information to respond to the user's message:\n{query}\n\n"
+            f"create inline citation by adding the source link of the reference document at the of the sentence."
+            f"Only use the link given in the reference document. DO NOT create link by yourself."
+        )
+
+        # print(content_format)
+
+        memory_message = SystemMessage(content=content_format)
+        used_tokens += self.token_counter(memory_message.content)
+        historical_messages: List[BaseMessage] = []
+        print(previous_messages)
+        for message in previous_messages[-10:][::-1]:
+            message_tokens = self.token_counter(message.content)
+            if used_tokens + message_tokens > self.send_token_limit - 1000:
+                break
+            historical_messages = [message] + historical_messages
+            used_tokens += message_tokens
+        print(historical_messages)
+
+        input_message = HumanMessage(content=input_message)
+
+        messages: List[BaseMessage] = [base_prompt, time_prompt, memory_message]
+        messages += historical_messages
+        messages.append(input_message)
+
+        return messages
+
+    def process_chat_history(self, messages: List[dict]) -> List[BaseMessage]:
+        results = []
+
+        for message in messages:
+            print(message)
+            if message['type'] != 'message' and message['type'] != 'text':
+                continue
+
+            message_cls = AIMessage if message['user'] == self.ai_id else HumanMessage
+            # replace the at in the message with the name of the bot
+            text = message['text'].replace(f'@{self.ai_id}', f'@{self.ai_name}')
+            results.append(message_cls(content=text))
+
+        return results
+
diff --git a/apps/slackbot/requirements.txt b/apps/slackbot/requirements.txt
@@ -9,4 +9,6 @@ unstructured
 openai
 chromadb
 tiktoken
-Flask-Cors==3.0.10
+slack_bolt
+pinecone-client
+Flask-Cors==3.0.10