Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Run Name to Customer Eval Results Table #222

Draft
wants to merge 1 commit into
base: customer-eval
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions journeys/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"SEMANTIC_MODEL_STRING": "VARCHAR",
"EVAL_TABLE": "VARCHAR",
"EVAL_HASH": "VARCHAR",
"EVAL_RUN_NAME": "VARCHAR",
}

LLM_JUDGE_PROMPT_TEMPLATE = """\
Expand Down Expand Up @@ -289,6 +290,7 @@ def write_eval_results(frame: pd.DataFrame) -> None:
frame_to_write = frame.copy()
frame_to_write["TIMESTAMP"] = st.session_state["eval_timestamp"]
frame_to_write["EVAL_HASH"] = st.session_state["eval_hash"]
frame_to_write["EVAL_RUN_NAME"] = st.session_state["eval_run_name"]
frame_to_write["EVAL_TABLE"] = st.session_state["eval_table"]
frame_to_write["EVAL_TABLE_HASH"] = st.session_state["eval_table_hash"]
frame_to_write["MODEL_HASH"] = st.session_state["semantic_model_hash"]
Expand Down Expand Up @@ -612,6 +614,7 @@ def evaluation_mode_show() -> None:
st.write(
"Welcome!🧪 In the evaluation mode you can evaluate your semantic model using pairs of golden queries/questions and their expected SQL statements. These pairs should be captured in an **Evaluation Table**. Accuracy metrics will be shown and the results will be stored in an **Evaluation Results Table**."
)
st.text_input("Evaluation Run Name", key="eval_run_name")

# TODO: find a less awkward way of specifying this.
if any(key not in st.session_state for key in ("eval_table", "results_eval_table")):
Expand All @@ -637,6 +640,7 @@ def evaluation_mode_show() -> None:

evolution_run_summary = pd.DataFrame(
[
["Evaluation Run Name", st.session_state["eval_run_name"]],
["Evaluation Table Hash", st.session_state["eval_table_hash"]],
["Semantic Model Hash", st.session_state["semantic_model_hash"]],
["Evaluation Run Hash", st.session_state["eval_hash"]],
Expand Down