Sample update mslearn (#138)

* update promptflow-eval dependencies to azure-ai-evaluation * clear local variables * fix errors and remove 'question' col from data * small fix in evaluator config * Restructuring evaluation samples for consistency and clarity * remove key * disable pre-commit errors * address some comments * format and linter * bring modelendpoints into notebook * pre-commit fixes * index sample update * add more advanced use of simulator * fix env vars * fix user override prompty
Azure-Samples · Oct 17, 2024 · 933d29e · 933d29e
1 parent 2c25877
commit 933d29e
Show file tree

Hide file tree

Showing 35 changed files with 1,619 additions and 999 deletions.
diff --git a/scenarios/evaluate-model-endpoints/app_target.py b/scenarios/evaluate-model-endpoints/app_target.py
diff --git a/scenarios/evaluate-model-endpoints/data.jsonl b/scenarios/evaluate-model-endpoints/data.jsonl
diff --git a/scenarios/evaluate-app-endpoint/README.md → scenarios/evaluate/evaluate_app/README.md b/scenarios/evaluate-app-endpoint/README.md → scenarios/evaluate/evaluate_app/README.md
diff --git a/scenarios/evaluate-app-endpoint/askwiki.py → scenarios/evaluate/evaluate_app/askwiki.py b/scenarios/evaluate-app-endpoint/askwiki.py → scenarios/evaluate/evaluate_app/askwiki.py
diff --git a/scenarios/evaluate-app-endpoint/data.jsonl → scenarios/evaluate/evaluate_app/data.jsonl b/scenarios/evaluate-app-endpoint/data.jsonl → scenarios/evaluate/evaluate_app/data.jsonl
diff --git a/...aluate-app-endpoint/evaluate-target.ipynb → .../evaluate/evaluate_app/evaluate_app.ipynb b/...aluate-app-endpoint/evaluate-target.ipynb → .../evaluate/evaluate_app/evaluate_app.ipynb
@@ -5,7 +5,18 @@
    "id": "2e932e4c-5d55-461e-a313-3a087d8983b5",
    "metadata": {},
    "source": [
-    "# Standard evaluators and target functions.\n"
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Evaluate app using Azure AI Evaluation APIs\n"
    ]
   },
   {
@@ -14,7 +25,7 @@
    "metadata": {},
    "source": [
     "## Objective\n",
-    "In this notebook we will demonstrate how to use the target functions with the standard evaluators.\n",
+    "In this notebook we will demonstrate how to use the target functions with the standard evaluators to evaluate an app.\n",
     "\n",
     "This tutorial provides a step-by-step guide on how to evaluate a function\n",
     "\n",

diff --git a/...aluate-app-endpoint/system-message.jinja2 → ...aluate/evaluate_app/system-message.jinja2 b/...aluate-app-endpoint/system-message.jinja2 → ...aluate/evaluate_app/system-message.jinja2
diff --git a/...rsarial-interactions/askwiki/blocklist.py → ...ios/evaluate/evaluate_custom/blocklist.py b/...rsarial-interactions/askwiki/blocklist.py → ...ios/evaluate/evaluate_custom/blocklist.py
@@ -1,13 +1,12 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import List, Dict
 
 
 class BlocklistEvaluator:
-    def __init__(self: "BlocklistEvaluator", blocklist: List[str]) -> None:
+    def __init__(self, blocklist) -> None:  # noqa: ANN101, ANN001
         self._blocklist = blocklist
 
-    def __call__(self: "BlocklistEvaluator", *, answer: str) -> Dict[str, bool]:
-        score = any(word in answer for word in self._blocklist)
+    def __call__(self: "BlocklistEvaluator", *, response: str):  # noqa: ANN204
+        score = any(word in response for word in self._blocklist)
         return {"score": score}
diff --git a/scenarios/evaluate/evaluate_custom/data.jsonl b/scenarios/evaluate/evaluate_custom/data.jsonl
@@ -0,0 +1,3 @@
+{"query":"When was United Stated found ?", "response":"1776"}
+{"query":"What is the capital of France?", "response":"Paris"}
+{"query":"Who is the best tennis player of all time ?", "response":"Roger Federer"}