ai-cfia · MaxenceGui · Apr 16, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/.env.template b/.env.template
@@ -2,3 +2,8 @@ BING_SEARCH_KEY =
 BING_ENDPOINT =
 CACHE_PATH =
 OUTPUT_FOLDER =
+
+AZURE_STORAGE_CONNECTION_STRING=
+SEEDS_NAME=
+TESTING_FOLDERS=
+NACHET_BACKEND_URL=
diff --git a/nachet/datastore.py b/nachet/datastore.py
@@ -0,0 +1,79 @@
+from concurrent.futures import ThreadPoolExecutor
+
+from azure.storage.blob import BlobServiceClient, ContainerClient
+
+
+class DatastoreErrors(Exception):
+    pass
+
+
+def get_blob_client(connection: str) -> BlobServiceClient:
+    """
+    Returns a BlobServiceClient object initialized with the provided connection
+    string.
+
+    Args:
+        connection (str): The connection string for the Azure Blob Storage
+        account.
+
+    Returns:
+        BlobServiceClient: The initialized BlobServiceClient object.
+
+    Raises:
+        DatastoreErrors: If the blob client cannot be retrieved.
+    """
+    try:
+        return BlobServiceClient.from_connection_string(connection)
+    except ValueError as error:
+        raise DatastoreErrors("could not retrieve the blob client") from error
+
+
+def get_testing_image(blob_path: str,
+    blob_service_client: BlobServiceClient,
+    seed_name: str, key_word: str = "testing") -> list[str]:
+    """
+    Get the blobs of testing images from Azure Blob Storage.
+
+    Args:
+        blob_path (str): The path to the blob containers.
+        blob_service_client (BlobServiceClient): The BlobServiceClient object.
+        seed_name (list[str]): A list of seed names.
+        key_word (str, optional): The keyword to filter the blob names. Defaults to "testing".
+
+    Returns:
+        list[str]: A dictionary containing the seed names as keys and the corresponding image blobs as values.
+    """
+
+    def get_blob_urls(container: ContainerClient) -> list[str]:
+        """
+        Get the blobs in a container.
+
+        Args:
+            container (ContainerClient): The ContainerClient object.
+
+        Returns:
+            list: A list of blob.
+        """
+
+        return [
+            container.get_blob_client(blob.name).download_blob().readall()
+            for blob in container.list_blobs()
+            if seed_name in blob.name and key_word in blob.name
+        ]
+
+    container_list = blob_service_client.list_containers(name_starts_with=blob_path)
+    containers = [blob_service_client.get_container_client(c.name) for c in container_list]
+
+    with ThreadPoolExecutor() as executor:
+        images = sum(executor.map(get_blob_urls, containers), [])
+
+    return images
+
+
+def get_user_image(blob_path: list[str],
+    blob_service_client: BlobServiceClient,
+    seed_name: list[str], key_word: str = "user") -> list[str]:
+    pass
+
+def get_image_from_folder(blob_path: str) -> list[str]:
+    pass
diff --git a/nachet/inference_testing.py b/nachet/inference_testing.py
@@ -0,0 +1,72 @@
+import base64
+import requests
+import time
+
+
+def start_testing(amount: int, data: list, backend_url: str, models: list[str]) -> dict:
+    """
+    Start the testing process.
+
+    Args:
+        amount (int): The number of tests to perform.
+        data (list): A list containing the seeds name and testing folders.
+        backend_url (str): The URL of the backend.
+        bsc: The BSC object.
+
+    Returns:
+        dict: A dictionary containing the results of the testing process.
+    """
+
+    results = {model: {} for model in models}
+
+    images_to_test = [base64.b64encode(blob).decode("utf8") for blob in data[:amount]]
+
+    i = 1
+    for img in images_to_test:
+
+        key = f"image{i:02d}"
+        i += 1
+        for model in models:
+            payload = {
+                "model_name": model,
+                "validator": "nachet_testing_image",
+                "folder_name": "api_test_nachet",
+                "container_name": "testing-images",
+                "imageDims": [100, 100],
+                "image": "data:image/PNG;base64," + img
+            }
+
+            headers = {
+                    "Content-Type": "application/json",
+                    "Access-Control-Allow-Origin": "*",
+                }
+
+            start = time.perf_counter()
+            response = requests.post(
+                backend_url + "/inf", json=payload, headers=headers).json()
+            end = time.perf_counter() - start
+
+            boxes = response[0]["boxes"]
+
+            print(key)
+            print(f"number of seed detecte: {len(boxes)}")
+
+            topN = []
+
+            if boxes[0].get("topN"):
+                for box in boxes:
+                    topN.extend([score.get("label") for score in box.get("topN")])
+
+            results.get(model).update({
+                key: {
+                    "labels": [box.get("label") for box in boxes],
+                    "topN": topN,
+                    "nb_seeds": response[0].get("totalBoxes"),
+                    "request_time": end,
+                }
+            })
+
+    return results
+
+def test_inference(image: str, backend_url: str) -> dict:
+    pass
diff --git a/nachet/nachet_test.py b/nachet/nachet_test.py
@@ -0,0 +1,42 @@
+
+import os
+import requests
+
+from dotenv import load_dotenv
+
+from datastore import get_blob_client
+from nachet_ui import actions
+
+load_dotenv()
+
+# Environment variable
+AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
+SEEDS_NAME = os.getenv("SEEDS_NAME")
+TESTING_FOLDERS = os.getenv("TESTING_FOLDERS")
+NACHET_BACKEND_URL = os.getenv("NACHET_BACKEND_URL")
+
+
+def format_list_env():
+    """
+    Format the list of environment variable for the seeds name and testing
+    folders.
+    """
+    seeds_name = [name.strip() for name in SEEDS_NAME.split(',')]
+    testing_folders = [name.strip() for name in TESTING_FOLDERS.split(',')]
+    return seeds_name, testing_folders
+
+
+def app_initialisation():
+    url = NACHET_BACKEND_URL + "/model-endpoints-metadata"
+    response = requests.get(url).json()
+    models = [m.get("model_name") for m in response]
+    seeds_name, testing_folders = format_list_env()
+    bsc = get_blob_client(AZURE_STORAGE_CONNECTION_STRING)
+    return seeds_name, testing_folders, models, bsc, NACHET_BACKEND_URL
+
+
+def main():
+    actions[5](*app_initialisation())
+
+if __name__ == "__main__":
+   main()
diff --git a/nachet/nachet_ui.py b/nachet/nachet_ui.py
@@ -0,0 +1,117 @@
+import time
+import sys
+import openpyxl
+import json
+
+from datastore import get_testing_image
+from inference_testing import start_testing
+
+CACHE = {
+    "blob_image": {}
+}
+
+
+def test_image(seed: str):
+    clear()
+    print("Start loading images")
+    seconds = time.perf_counter()
+
+    if not CACHE.get("blob_image").get(seed):
+        CACHE["blob_image"][seed] = get_testing_image(
+            CACHE["testing_folders"][0],
+            CACHE["DATASTORE_CLIENT"],
+            seed,
+        )
+
+    print(f"Finish loading {len(CACHE['blob_image'][seed])} images")
+    print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds")
+
+    amount = input(
+"""
+Enter the number of image you want to test the models against:
+"""
+    )
+    nb_image = int(amount)
+
+    _ = input("Enter any key to start testing")
+
+    clear()
+    print("Start testing images")
+    seconds = time.perf_counter()
+    results = start_testing(nb_image, CACHE["blob_image"][seed], CACHE["NACHET_BACKEND_URL"], CACHE["MODELS"])
+    print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds")
+
+    with open(f"results_{seed}.txt", "w+") as f:
+        f.write(json.dumps(results, indent=4))
+
+    save_to_workbook(results, seed)
+
+    print("Results saved to workbook")
+
+
+def user_image(seed: str):
+    clear()
+    print(f"not implement yet {seed}")
+    menu()
+
+def folder_specific_image(seed: str):
+    clear()
+    print("not implement yet")
+    menu()
+
+def save_to_workbook(results: dict, seed: str):
+    wb = openpyxl.Workbook()
+    ws = wb.active
+
+    for model, result in results.items():
+        ws.append([model])
+        ws.append(["Image", "Labels", "TopN", "time"])
+
+        for key, value in result.items():
+            ws.append([key, value["request_time"]])
+            for i in range(value["nb_seeds"]):
+                ws.append([value["labels"][i], value["topN"][i]])
+
+    wb.save(f"results_{seed}.xlsx")
+
+    print(f"Results saved in results_{seed}.xlsx")
+
+def clear():
+    sys.stdout.write("\033[H\033[J")
+
+def menu(*args):
+    if args:
+        CACHE["seeds_name"] = args[0]
+        CACHE["testing_folders"] = args[1]
+        CACHE["MODELS"] = args[2]
+        CACHE["DATASTORE_CLIENT"] = args[3]
+        CACHE["NACHET_BACKEND_URL"] = args[4]
+
+    print("Welcome to nachet testing app!")
+    for i, seed in enumerate(CACHE["seeds_name"]):
+        print(f"{i+1}. {seed}")
+
+    selection = input("Select the seed you want to test:")
+
+    seed_to_test = int(selection)-1
+
+    clear()
+
+    selection = input("""
+To test with test picture enter 1.
+To test with user picture enter 2.
+To test with a pecific folder enter 3.
+To exit quit the app enter 4.
+
+Please enter your selection:
+    """
+    )
+    actions[int(selection)](CACHE["seeds_name"][seed_to_test])
+
+actions = {
+    1: test_image,
+    2: user_image,
+    3: folder_specific_image,
+    4: sys.exit,
+    5: menu,
+}
diff --git a/nachet/todo.py b/nachet/todo.py
@@ -0,0 +1,10 @@
+# TODO Createa connexion to azure storage that can eventually be switch to a
+# datastore connection
+
+# TODO Retrieve a user specified amount of testing image
+
+# TODO Retrieve a user specified amount of user image
+
+# TODO Have a datastructure for both image type
+
+# TODO have a user message displaying the end of the loading image.
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,8 @@
 locust
 python-dotenv
 azure-cognitiveservices-search-websearch
+azure-storage-blob
+azure-identity
 msrest
 openpyxl
 natsort