-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use an app factory pattern to enable app testing #495
Changes from all commits
7414c01
7b03c87
5fb6f1c
2d9b94a
19d1ee4
41636a9
0ac3830
3b19d4c
d8a113c
4c96807
a7a8f0d
2017c05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,14 +14,23 @@ jobs: | |
fail-fast: false | ||
matrix: | ||
os: ["ubuntu-20.04"] | ||
python_version: ["3.8", "3.9", "3.10", "3.11"] | ||
python_version: ["3.9", "3.10", "3.11"] | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Setup python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python_version }} | ||
architecture: x64 | ||
- name: Setup node | ||
uses: actions/setup-node@v2 | ||
with: | ||
node-version: 18 | ||
- name: Build frontend | ||
run: | | ||
cd ./app/frontend | ||
npm install | ||
npm run build | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This must be built since the Flask tests try to hit up index.html |
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,85 +5,67 @@ | |
import time | ||
|
||
import openai | ||
from azure.identity import DefaultAzureCredential | ||
from azure.search.documents import SearchClient | ||
from azure.storage.blob import BlobServiceClient | ||
from flask import ( | ||
Blueprint, | ||
Flask, | ||
abort, | ||
current_app, | ||
jsonify, | ||
request, | ||
send_file, | ||
send_from_directory, | ||
) | ||
|
||
from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach | ||
from approaches.readdecomposeask import ReadDecomposeAsk | ||
from approaches.readretrieveread import ReadRetrieveReadApproach | ||
from approaches.retrievethenread import RetrieveThenReadApproach | ||
from azure.identity import DefaultAzureCredential | ||
from azure.search.documents import SearchClient | ||
from azure.storage.blob import BlobServiceClient | ||
from flask import Flask, abort, jsonify, request, send_file | ||
|
||
# Replace these with your own values, either in environment variables or directly here | ||
AZURE_STORAGE_ACCOUNT = os.environ.get("AZURE_STORAGE_ACCOUNT") or "mystorageaccount" | ||
AZURE_STORAGE_CONTAINER = os.environ.get("AZURE_STORAGE_CONTAINER") or "content" | ||
AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") or "gptkb" | ||
AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") or "gptkbindex" | ||
AZURE_OPENAI_SERVICE = os.environ.get("AZURE_OPENAI_SERVICE") or "myopenai" | ||
AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT_DEPLOYMENT") or "davinci" | ||
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT") or "chat" | ||
AZURE_OPENAI_CHATGPT_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_MODEL") or "gpt-35-turbo" | ||
AZURE_OPENAI_EMB_DEPLOYMENT = os.environ.get("AZURE_OPENAI_EMB_DEPLOYMENT") or "embedding" | ||
|
||
KB_FIELDS_CONTENT = os.environ.get("KB_FIELDS_CONTENT") or "content" | ||
KB_FIELDS_CATEGORY = os.environ.get("KB_FIELDS_CATEGORY") or "category" | ||
KB_FIELDS_SOURCEPAGE = os.environ.get("KB_FIELDS_SOURCEPAGE") or "sourcepage" | ||
|
||
# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed, | ||
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the | ||
# keys for each service | ||
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) | ||
azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True) | ||
|
||
# Used by the OpenAI SDK | ||
openai.api_type = "azure" | ||
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com" | ||
openai.api_version = "2023-05-15" | ||
|
||
# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead | ||
openai.api_type = "azure_ad" | ||
openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default") | ||
openai.api_key = openai_token.token | ||
|
||
# Set up clients for Cognitive Search and Storage | ||
search_client = SearchClient( | ||
endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net", | ||
index_name=AZURE_SEARCH_INDEX, | ||
credential=azure_credential) | ||
blob_client = BlobServiceClient( | ||
account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", | ||
credential=azure_credential) | ||
blob_container = blob_client.get_container_client(AZURE_STORAGE_CONTAINER) | ||
|
||
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns | ||
# or some derivative, here we include several for exploration purposes | ||
ask_approaches = { | ||
"rtr": RetrieveThenReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, AZURE_OPENAI_EMB_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT), | ||
"rrr": ReadRetrieveReadApproach(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, AZURE_OPENAI_EMB_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT), | ||
"rda": ReadDecomposeAsk(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, AZURE_OPENAI_EMB_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT) | ||
} | ||
|
||
chat_approaches = { | ||
"rrr": ChatReadRetrieveReadApproach(search_client, | ||
AZURE_OPENAI_CHATGPT_DEPLOYMENT, | ||
AZURE_OPENAI_CHATGPT_MODEL, | ||
AZURE_OPENAI_EMB_DEPLOYMENT, | ||
KB_FIELDS_SOURCEPAGE, | ||
KB_FIELDS_CONTENT) | ||
} | ||
|
||
app = Flask(__name__) | ||
|
||
@app.route("/", defaults={"path": "index.html"}) | ||
@app.route("/<path:path>") | ||
def static_file(path): | ||
return app.send_static_file(path) | ||
AZURE_STORAGE_ACCOUNT = os.getenv("AZURE_STORAGE_ACCOUNT", "mystorageaccount") | ||
AZURE_STORAGE_CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER", "content") | ||
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE", "gptkb") | ||
AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX", "gptkbindex") | ||
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE", "myopenai") | ||
AZURE_OPENAI_GPT_DEPLOYMENT = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT", "davinci") | ||
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "chat") | ||
AZURE_OPENAI_CHATGPT_MODEL = os.getenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-35-turbo") | ||
AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT", "embedding") | ||
|
||
KB_FIELDS_CONTENT = os.getenv("KB_FIELDS_CONTENT", "content") | ||
KB_FIELDS_CATEGORY = os.getenv("KB_FIELDS_CATEGORY", "category") | ||
KB_FIELDS_SOURCEPAGE = os.getenv("KB_FIELDS_SOURCEPAGE", "sourcepage") | ||
|
||
CONFIG_OPENAI_TOKEN = "openai_token" | ||
CONFIG_CREDENTIAL = "azure_credential" | ||
CONFIG_ASK_APPROACHES = "ask_approaches" | ||
CONFIG_CHAT_APPROACHES = "chat_approaches" | ||
CONFIG_BLOB_CLIENT = "blob_client" | ||
|
||
|
||
bp = Blueprint("routes", __name__, static_folder='static') | ||
|
||
@bp.route("/") | ||
def index(): | ||
return bp.send_static_file("index.html") | ||
|
||
@bp.route("/favicon.ico") | ||
def favicon(): | ||
return bp.send_static_file("favicon.ico") | ||
|
||
@bp.route("/assets/<path:path>") | ||
def assets(path): | ||
return send_from_directory("static/assets", path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a security upgrade over the previous route that used send_static_file for arbitrary paths. |
||
|
||
# Serve content files from blob storage from within the app to keep the example self-contained. | ||
# *** NOTE *** this assumes that the content files are public, or at least that all users of the app | ||
# can access all the files. This is also slow and memory hungry. | ||
@app.route("/content/<path>") | ||
@bp.route("/content/<path>") | ||
def content_file(path): | ||
blob_container = current_app.config[CONFIG_BLOB_CLIENT].get_container_client(AZURE_STORAGE_CONTAINER) | ||
blob = blob_container.get_blob_client(path).download_blob() | ||
if not blob.properties or not blob.properties.has_key("content_settings"): | ||
abort(404) | ||
|
@@ -95,13 +77,13 @@ def content_file(path): | |
blob_file.seek(0) | ||
return send_file(blob_file, mimetype=mime_type, as_attachment=False, download_name=path) | ||
|
||
@app.route("/ask", methods=["POST"]) | ||
@bp.route("/ask", methods=["POST"]) | ||
def ask(): | ||
if not request.json: | ||
return jsonify({"error": "request must be json"}), 400 | ||
if not request.is_json: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was a bug, caught by the new tests. Calling request.json causes an immediate 415 HTTP error if theres no JSON passed in, thus it's now request.is_json. |
||
return jsonify({"error": "request must be json"}), 415 | ||
approach = request.json["approach"] | ||
try: | ||
impl = ask_approaches.get(approach) | ||
impl = current_app.config[CONFIG_ASK_APPROACHES].get(approach) | ||
if not impl: | ||
return jsonify({"error": "unknown approach"}), 400 | ||
r = impl.run(request.json["question"], request.json.get("overrides") or {}) | ||
|
@@ -110,13 +92,13 @@ def ask(): | |
logging.exception("Exception in /ask") | ||
return jsonify({"error": str(e)}), 500 | ||
|
||
@app.route("/chat", methods=["POST"]) | ||
@bp.route("/chat", methods=["POST"]) | ||
def chat(): | ||
if not request.json: | ||
return jsonify({"error": "request must be json"}), 400 | ||
if not request.is_json: | ||
return jsonify({"error": "request must be json"}), 415 | ||
approach = request.json["approach"] | ||
try: | ||
impl = chat_approaches.get(approach) | ||
impl = current_app.config[CONFIG_CHAT_APPROACHES].get(approach) | ||
if not impl: | ||
return jsonify({"error": "unknown approach"}), 400 | ||
r = impl.run(request.json["history"], request.json.get("overrides") or {}) | ||
|
@@ -125,12 +107,89 @@ def chat(): | |
logging.exception("Exception in /chat") | ||
return jsonify({"error": str(e)}), 500 | ||
|
||
@app.before_request | ||
@bp.before_request | ||
def ensure_openai_token(): | ||
global openai_token | ||
openai_token = current_app.config[CONFIG_OPENAI_TOKEN] | ||
if openai_token.expires_on < time.time() + 60: | ||
openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default") | ||
openai_token = current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default") | ||
current_app.config[CONFIG_OPENAI_TOKEN] = openai_token | ||
openai.api_key = openai_token.token | ||
|
||
|
||
def create_app(): | ||
app = Flask(__name__) | ||
|
||
# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed, | ||
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the | ||
# keys for each service | ||
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) | ||
azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True) | ||
|
||
# Set up clients for Cognitive Search and Storage | ||
search_client = SearchClient( | ||
endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net", | ||
index_name=AZURE_SEARCH_INDEX, | ||
credential=azure_credential) | ||
blob_client = BlobServiceClient( | ||
account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", | ||
credential=azure_credential) | ||
|
||
# Used by the OpenAI SDK | ||
openai.api_type = "azure" | ||
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com" | ||
openai.api_version = "2023-05-15" | ||
|
||
# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead | ||
openai.api_type = "azure_ad" | ||
openai_token = azure_credential.get_token( | ||
"https://cognitiveservices.azure.com/.default" | ||
) | ||
openai.api_key = openai_token.token | ||
|
||
# Store on app.config for later use inside requests | ||
app.config[CONFIG_OPENAI_TOKEN] = openai_token | ||
app.config[CONFIG_CREDENTIAL] = azure_credential | ||
app.config[CONFIG_BLOB_CLIENT] = blob_client | ||
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns | ||
# or some derivative, here we include several for exploration purposes | ||
app.config[CONFIG_ASK_APPROACHES] = { | ||
"rtr": RetrieveThenReadApproach( | ||
search_client, | ||
AZURE_OPENAI_CHATGPT_DEPLOYMENT, | ||
AZURE_OPENAI_CHATGPT_MODEL, | ||
AZURE_OPENAI_EMB_DEPLOYMENT, | ||
KB_FIELDS_SOURCEPAGE, | ||
KB_FIELDS_CONTENT | ||
), | ||
"rrr": ReadRetrieveReadApproach( | ||
search_client, | ||
AZURE_OPENAI_GPT_DEPLOYMENT, | ||
AZURE_OPENAI_EMB_DEPLOYMENT, | ||
KB_FIELDS_SOURCEPAGE, | ||
KB_FIELDS_CONTENT | ||
), | ||
"rda": ReadDecomposeAsk(search_client, | ||
AZURE_OPENAI_GPT_DEPLOYMENT, | ||
AZURE_OPENAI_EMB_DEPLOYMENT, | ||
KB_FIELDS_SOURCEPAGE, | ||
KB_FIELDS_CONTENT | ||
) | ||
} | ||
app.config[CONFIG_CHAT_APPROACHES] = { | ||
"rrr": ChatReadRetrieveReadApproach( | ||
search_client, | ||
AZURE_OPENAI_CHATGPT_DEPLOYMENT, | ||
AZURE_OPENAI_CHATGPT_MODEL, | ||
AZURE_OPENAI_EMB_DEPLOYMENT, | ||
KB_FIELDS_SOURCEPAGE, | ||
KB_FIELDS_CONTENT, | ||
) | ||
} | ||
|
||
app.register_blueprint(bp) | ||
|
||
return app | ||
|
||
if __name__ == "__main__": | ||
app = create_app() | ||
app.run() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ | |
workers = (num_cpus * 2) + 1 | ||
threads = 1 if num_cpus == 1 else 2 | ||
timeout = 600 | ||
worker_class = "gthread" |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing 3.8 since we don't currently support it (due to our typing syntax). We can add it back if its an issue for customers.