Skip to content

Commit

Permalink
Excluding RAG apps from ruff check (#405)
Browse files Browse the repository at this point in the history
Excluding RAG apps from ruff check

Co-authored-by: dristy.cd <dristy@clouddefense.io>
  • Loading branch information
dristysrivastava and dristy.cd authored Jul 12, 2024
1 parent e7fee44 commit 14d7421
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 52 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ MYPY_CACHE=.mypy_cache
lint:
ruff check .
ruff format . --diff
ruff --select I .
ruff check --select I .
mkdir -p $(MYPY_CACHE) && mypy --install-types --non-interactive $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) --exclude build/ --exclude pebblo_saferetriever --check-untyped-defs || true

spell_check:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from typing import Optional
import os
import requests
from typing import Optional

import requests
from dotenv import load_dotenv

load_dotenv()


class SharepointADHelper:
def __init__(
self,
client_id: Optional[str] = None,
client_secret: Optional[str] = None,
tenant_id: Optional[str] = None
tenant_id: Optional[str] = None,
):
self.client_id = client_id or os.environ.get("O365_CLIENT_ID")
self.client_secret = client_secret or os.environ.get("O365_CLIENT_SECRET")
Expand All @@ -21,9 +23,11 @@ def __init__(
)
self.access_token = self.get_access_token()
if not self.access_token:
raise EnvironmentError("o365 client id/secret or tenant id is invalid."
"Please check the environment variables.")
self.headers = { 'Authorization': 'Bearer' + self.access_token }
raise EnvironmentError(
"o365 client id/secret or tenant id is invalid."
"Please check the environment variables."
)
self.headers = {"Authorization": "Bearer" + self.access_token}

def get_authorized_identities(self, user_id: str):
"""
Expand All @@ -38,11 +42,15 @@ def get_authorized_identities(self, user_id: str):
user = self._get_users(user_id)
user_index_id = user.get("id")
if not user_index_id:
print(f"Could not find the user `{user_id}` information in Microsoft Graph API. Not authorized.")
print(
f"Could not find the user `{user_id}` information in Microsoft Graph API. Not authorized."
)
return [user_id]
associated_groups = self._get_associated_groups(user_index_id)
associated_groups_emails = [
group.get("mail") for group in associated_groups["value"] if group.get("mail")
group.get("mail")
for group in associated_groups["value"]
if group.get("mail")
]
return associated_groups_emails + [user_id]

Expand Down Expand Up @@ -92,7 +100,6 @@ def _get_users(self, user_id: str):
else:
return response.json()


def get_access_token(self):
"""
Retrieves an access token from Microsoft Graph API using client credentials.
Expand All @@ -102,14 +109,14 @@ def get_access_token(self):
requests.exceptions.HTTPError: If the request to retrieve the access token fails.
"""
# ToDo: This access token should be cached and refreshed when it expires
# It should also be stored in home directory or in a secure location
# It should also be stored in home directory or in a secure location
url = f"https://login.microsoftonline.com/{self.tenant_id}/oauth2/v2.0/token"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
data = {
"grant_type": "client_credentials",
"client_id": self.client_id,
"client_secret": self.client_secret,
"scope": "https://graph.microsoft.com/.default"
"scope": "https://graph.microsoft.com/.default",
}
try:
response = requests.post(url, headers=headers, data=data, timeout=10)
Expand All @@ -120,5 +127,6 @@ def get_access_token(self):
else:
return response.json()["access_token"]


if __name__ == "__main__":
pass
pass
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
from dotenv import load_dotenv
load_dotenv()

import asyncio
import os
from typing import Optional

from msgraph import GraphServiceClient
from azure.identity import ClientSecretCredential
from dotenv import load_dotenv
from kiota_abstractions.api_error import APIError
from msgraph import GraphServiceClient

load_dotenv() # While running RAG app, move to line no. 2


async def get_authorized_identities(
user_id: str,
client_id: Optional[str] = None,
client_secret: Optional[str] = None,
tenant_id: Optional[str] = None
tenant_id: Optional[str] = None,
):
client_id = client_id or os.environ.get("O365_CLIENT_ID")
client_secret = client_secret or os.environ.get("O365_CLIENT_SECRET")
Expand Down Expand Up @@ -42,6 +43,6 @@ async def get_authorized_identities(
] + [user_id]
return auth_iden


if __name__ == "__main__":
print(asyncio.run(get_authorized_identities("arpit@daxaai.onmicrosoft.com")))

Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
# Fill-in OPENAI_API_KEY in .env file in this directory before proceeding
import os

from dotenv import load_dotenv
load_dotenv()

import asyncio
import os
from msgraph_api_auth import SharepointADHelper
from langchain_community.chains import PebbloRetrievalQA
from langchain_community.chains.pebblo_retrieval.models import (
AuthContext,
ChainInput,
)
from langchain_community.document_loaders import UnstructuredFileIOLoader
from langchain_community.document_loaders import SharePointLoader
from langchain_community.document_loaders.pebblo import PebbloSafeLoader
from langchain_community.vectorstores.qdrant import Qdrant
from langchain_community.document_loaders import SharePointLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.llms import OpenAI


from msgraph_api_auth import SharepointADHelper


class PebbloIdentityRAG:
Expand Down Expand Up @@ -98,25 +95,34 @@ def ask(self, question: str, user_email: str, auth_identifiers: list):

print("Please enter ingestion user details for loading data...")
app_client_id = input("App client id : ") or os.environ.get("O365_CLIENT_ID")
app_client_secret = input("App client secret : ") or os.environ.get("O365_CLIENT_SECRET")
app_client_secret = input("App client secret : ") or os.environ.get(
"O365_CLIENT_SECRET"
)
tenant_id = input("Tenant id : ") or os.environ.get("O365_TENANT_ID")

drive_id = input("Drive id : ") or "b!TVvGZhXfGUuSKMdCgOucz08XRKxsDuVCojWCjzBMN-as9t-EstljQKBl332OMJnI"

drive_id = (
input("Drive id : ")
or "b!TVvGZhXfGUuSKMdCgOucz08XRKxsDuVCojWCjzBMN-as9t-EstljQKBl332OMJnI"
)

rag_app = PebbloIdentityRAG(
drive_id = drive_id, folder_path = "/document", collection_name=input_collection_name
drive_id=drive_id,
folder_path="/document",
collection_name=input_collection_name,
)

while True:
print("Please enter end user details below")
end_user_email_address = input("User email address : ") or "arpit@daxaai.onmicrosoft.com"
end_user_email_address = (
input("User email address : ") or "arpit@daxaai.onmicrosoft.com"
)
prompt = input("Please provide the prompt : ") or "tell me about sample.pdf."
print(f"User: {end_user_email_address}.\nQuery:{prompt}\n")
authorized_identities = SharepointADHelper(
client_id = app_client_id,
client_secret = app_client_secret,
tenant_id = tenant_id,
).get_authorized_identities(end_user_email_address)
client_id=app_client_id,
client_secret=app_client_secret,
tenant_id=tenant_id,
).get_authorized_identities(end_user_email_address)
response = rag_app.ask(prompt, end_user_email_address, authorized_identities)
print(f"Response:\n{response}")
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
# Fill-in OPENAI_API_KEY in .env file in this directory before proceeding
import asyncio
import os

from dotenv import load_dotenv
load_dotenv()

import asyncio
import os
from msgraph_sdk_auth import get_authorized_identities
from langchain_community.chains import PebbloRetrievalQA
from langchain_community.chains.pebblo_retrieval.models import (
AuthContext,
ChainInput,
)
from langchain_community.document_loaders import UnstructuredFileIOLoader
from langchain_community.document_loaders import SharePointLoader
from langchain_community.document_loaders.pebblo import PebbloSafeLoader
from langchain_community.vectorstores.qdrant import Qdrant
from langchain_community.document_loaders import SharePointLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.llms import OpenAI


from msgraph_sdk_auth import get_authorized_identities


class PebbloIdentityRAG:
Expand Down Expand Up @@ -98,27 +96,36 @@ def ask(self, question: str, user_email: str, auth_identifiers: list):

print("Please enter ingestion user details for loading data...")
app_client_id = input("App client id : ") or os.environ.get("O365_CLIENT_ID")
app_client_secret = input("App client secret : ") or os.environ.get("O365_CLIENT_SECRET")
app_client_secret = input("App client secret : ") or os.environ.get(
"O365_CLIENT_SECRET"
)
tenant_id = input("Tenant id : ") or os.environ.get("O365_TENANT_ID")

drive_id = input("Drive id : ") or "b!TVvGZhXfGUuSKMdCgOucz08XRKxsDuVCojWCjzBMN-as9t-EstljQKBl332OMJnI"

drive_id = (
input("Drive id : ")
or "b!TVvGZhXfGUuSKMdCgOucz08XRKxsDuVCojWCjzBMN-as9t-EstljQKBl332OMJnI"
)

rag_app = PebbloIdentityRAG(
drive_id = drive_id, folder_path = "/document", collection_name=input_collection_name
drive_id=drive_id,
folder_path="/document",
collection_name=input_collection_name,
)
loop = asyncio.get_event_loop()

while True:
print("Please enter end user details below")
end_user_email_address = input("User email address : ") or "arpit@daxaai.onmicrosoft.com"
end_user_email_address = (
input("User email address : ") or "arpit@daxaai.onmicrosoft.com"
)
prompt = input("Please provide the prompt : ") or "tell me about sample.pdf."
print(f"User: {end_user_email_address}.\nQuery:{prompt}\n")
authorized_identities = loop.run_until_complete(
get_authorized_identities(
user_id=end_user_email_address,
client_id = app_client_id,
client_secret = app_client_secret,
tenant_id = tenant_id,
client_id=app_client_id,
client_secret=app_client_secret,
tenant_id=tenant_id,
)
)
response = rag_app.ask(prompt, end_user_email_address, authorized_identities)
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ include-package-data = false
[tool.setuptools.packages.find]
exclude=["tests*", "pebblo_safeloader*", "pebblo_saferetriever*", "pebblo_cloud*"]

[tool.ruff]
exclude = ["pebblo_saferetriever/langchain/identity-rag/sharepoint/*.py"]

[build-system]
# These are the assumed default build requirements from pip:
# https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support
Expand Down

0 comments on commit 14d7421

Please sign in to comment.