Skip to content

Commit

Permalink
Update the deployment with correct model names, requirements and base…
Browse files Browse the repository at this point in the history
… image (#39)
  • Loading branch information
hemajv authored Aug 9, 2024
1 parent 485f404 commit 3e7cdb0
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 7 deletions.
File renamed without changes.
21 changes: 17 additions & 4 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ def OPENAI_API_KEY() -> str:
model_id = st.selectbox(
label="Model",
options=[
"ibm/granite-20b-code-instruct-v1",
"ibm/granite-20b-code-instruct",
"codellama/codellama-34b-instruct",
"meta-llama/llama-2-13b",
"ibm/granite-3b-code-plus-v1",
"meta-llama/llama-2-13b-chat",
"ibm/granite-3b-code-instruct",
"meta-llama/llama-2-70b",
"mistralai/mistral-7b-v0-1",
"OpenAI/gpt3.5",
"bigcode/starcoder",
"tiiuae/falcon-180b",
Expand Down Expand Up @@ -251,9 +252,21 @@ def main(prompt_success: bool, prompt_diff: int, actual_doc: str):
with col3:
st.subheader("Evaluation Metrics")

# ROUGE scores
st.markdown(
"**Automatic evaluation using ROUGE metrics:**",
help="Using predefined metrics to evaluate the responses generated by the LLM"
)

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
rouge_scores = scorer.score(actual_doc, result)
st.markdown(f"ROUGE-1 Score:{rouge_scores['rouge1'].fmeasure:.2f}", help="ROUGE-1 refers to the overlap of unigrams (each word) between the system and reference summaries")
st.markdown(f"ROUGE-2 Score: {rouge_scores['rouge2'].fmeasure:.2f}", help="ROUGE-2 refers to the overlap of bigrams between the system and reference summaries")
st.markdown(f"ROUGE-L Score: {rouge_scores['rougeL'].fmeasure:.2f}", help="Longest common subsequence problem takes into account sentence-level structure similarity naturally and identifies longest co-occurring in sequence n-grams automatically")

st.markdown(
"**LLM based evaluation on logic, correctness and helpfulness:**",
help="Use Langchain Criteria based Eval to evaluate on cutsom criteria (this list can be updated based on what we are looking to see from the generated docs). Note this is language mo0del based evaluation and not always a true indication of the quality of the output that is generatged."
help="Use Langchain Criteria based Eval to evaluate on cutsom criteria (this list can be updated based on what we are looking to see from the generated docs). Note this is language model based evaluation and not always a true indication of the quality of the output that is generated."
)

lc_score = eval_using_langchain(result, prompt, actual_doc)
Expand Down
5 changes: 3 additions & 2 deletions app/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
html2text
# new ibm version breaks existing code,
# use this for the time being
ibm-generative-ai>=2.0.0
ibm-generative-ai==2.0.0
python-dotenv
streamlit-toggle-switch
streamlit
Expand All @@ -11,4 +11,5 @@ openai
textstat
scikit-learn
streamlit-feedback
langchain
langchain
langchain-community
2 changes: 1 addition & 1 deletion app/resources/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ spec:
claimName: user-feedback-db
containers:
- name: api-doc-container
image: docker.io/ochatterjee/api-docs-gen:v0.1.0
image: docker.io/hemaveeradhi/api-docs-gen:v0.2.0
imagePullPolicy: Always
env:
- name: GENAI_KEY
Expand Down

0 comments on commit 3e7cdb0

Please sign in to comment.