From a9cb91ddd7b868101676dc2f3d0d99a672a93750 Mon Sep 17 00:00:00 2001 From: Maxence Guindon Date: Tue, 16 Apr 2024 13:07:10 +0000 Subject: [PATCH 1/5] Fixes #18: initial commit --- nachet/datastore.py | 35 +++++++++++++++++++++++++++++++++++ nachet/todo.py | 16 ++++++++++++++++ requirements.txt | 2 ++ 3 files changed, 53 insertions(+) create mode 100644 nachet/datastore.py create mode 100644 nachet/todo.py diff --git a/nachet/datastore.py b/nachet/datastore.py new file mode 100644 index 0000000..f442894 --- /dev/null +++ b/nachet/datastore.py @@ -0,0 +1,35 @@ +from azure.storage.blob import BlobServiceClient, ContainerClient +from azure.core.exceptions import ResourceNotFoundError + +class DatastoreErrors(Exception): + pass + +def get_blob_client(connection: str) -> BlobServiceClient: + """ + Returns a BlobServiceClient object initialized with the provided connection + string. + + Args: + connection (str): The connection string for the Azure Blob Storage + account. + + Returns: + BlobServiceClient: The initialized BlobServiceClient object. + + Raises: + DatastoreErrors: If the blob client cannot be retrieved. + """ + try: + return BlobServiceClient.from_connection_string(connection) + except ValueError as error: + raise DatastoreErrors("could not retrieve the blob client") from error + + +def get_testing_image(amount: int, blob_path: str, + blob_service_client: BlobServiceClient) -> list[str]: + pass + + +def get_user_image(amount: int, blob_path: str | list, + blob_service_client: BlobServiceClient) -> list[str]: + pass diff --git a/nachet/todo.py b/nachet/todo.py new file mode 100644 index 0000000..58f08e4 --- /dev/null +++ b/nachet/todo.py @@ -0,0 +1,16 @@ +# TODO Createa connexion to azure storage that can eventually be switch to a +# datastore connection + +# TODO Retrieve a user specified amount of testing image + +# TODO Retrieve a user specified amount of user image + +# TODO Have a datastructure for both image type + +# TODO have a user message displaying the end of the loading image. + +import os + +# Environment variable +AZURE_STORAGE_CONNECTION_STRING = os.env("AZURE_STORAGE_CONNECTION_STRING") +SEEDS_NAME = os.env("SEEDS_NAME") diff --git a/requirements.txt b/requirements.txt index 92c606b..d31c762 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ locust python-dotenv azure-cognitiveservices-search-websearch +azure-storage-blob +azure-identity msrest openpyxl natsort From cbb5eacf95e0ce2720950a41af8fe0b3e61d3868 Mon Sep 17 00:00:00 2001 From: Maxence Guindon Date: Wed, 17 Apr 2024 12:46:07 +0000 Subject: [PATCH 2/5] fixes #18: Add function to get test image --- nachet/datastore.py | 72 +++++++++++++++++++++++++++++++++++++++++++-- nachet/todo.py | 6 ---- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/nachet/datastore.py b/nachet/datastore.py index f442894..09a4fa3 100644 --- a/nachet/datastore.py +++ b/nachet/datastore.py @@ -1,9 +1,35 @@ +import os +import time +import re + +from concurrent.futures import ThreadPoolExecutor + +from dotenv import load_dotenv from azure.storage.blob import BlobServiceClient, ContainerClient from azure.core.exceptions import ResourceNotFoundError + +load_dotenv() + +# Environment variable +AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") +SEEDS_NAME = os.getenv("SEEDS_NAME") +TESTING_FOLDERS = os.getenv("TESTING_FOLDERS") + + class DatastoreErrors(Exception): pass +def format_list_env(): + """ + Format the list of environment variable for the seeds name and testing + folders. + """ + seeds_name = [name.strip() for name in SEEDS_NAME.split(',')] + testing_folders = [name.strip() for name in TESTING_FOLDERS.split(',')] + return seeds_name, testing_folders + + def get_blob_client(connection: str) -> BlobServiceClient: """ Returns a BlobServiceClient object initialized with the provided connection @@ -25,11 +51,51 @@ def get_blob_client(connection: str) -> BlobServiceClient: raise DatastoreErrors("could not retrieve the blob client") from error + def get_testing_image(amount: int, blob_path: str, - blob_service_client: BlobServiceClient) -> list[str]: - pass + blob_service_client: BlobServiceClient, + seed_name: list[str], key_word: str = "testing") -> list[str]: + """ + 2024-taran-verified-seedid + """ + + def get_blob_urls(container: ContainerClient) -> list[str]: + """ + """ + return [ + container.get_blob_client(name).url + for name in container.list_blob_names() + if key_word in name + ] + + container_list = blob_service_client.list_containers(name_starts_with=blob_path) + containers = [blob_service_client.get_container_client(c.name) for c in container_list] + + with ThreadPoolExecutor() as executor: + img_url = sum(executor.map(get_blob_urls, containers), []) + + seed_testing = { + seed: [url for url in img_url if seed.split(" ")[1] in url] + for seed in seed_name + } + # Divide the amount per seed to select a number of image to test the models with + nb_image_per_seed = round(amount / len(seed_name)) -def get_user_image(amount: int, blob_path: str | list, + print(seed_testing[seed_name[0]]) + + print(seed_name[0]) + + return seed_testing + + +def get_user_image(amount: int, blob_path: list[str], blob_service_client: BlobServiceClient) -> list[str]: pass + +if __name__ == "__main__": + seconds = time.perf_counter() + seeds_name, testing_folders = format_list_env() + bsc = get_blob_client(AZURE_STORAGE_CONNECTION_STRING) + get_testing_image(55, testing_folders[1], bsc, seeds_name) + print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") diff --git a/nachet/todo.py b/nachet/todo.py index 58f08e4..095cdf2 100644 --- a/nachet/todo.py +++ b/nachet/todo.py @@ -8,9 +8,3 @@ # TODO Have a datastructure for both image type # TODO have a user message displaying the end of the loading image. - -import os - -# Environment variable -AZURE_STORAGE_CONNECTION_STRING = os.env("AZURE_STORAGE_CONNECTION_STRING") -SEEDS_NAME = os.env("SEEDS_NAME") From 039b49e7ae007e5adf5fe0e0d9ca264d03fef5b3 Mon Sep 17 00:00:00 2001 From: Maxence Guindon Date: Wed, 17 Apr 2024 20:45:47 +0000 Subject: [PATCH 3/5] fixes #18: Start Nachet test ui --- nachet/datastore.py | 82 +++++++++++++----------------------- nachet/inference_testing.py | 45 ++++++++++++++++++++ nachet/nachet_test.py | 41 ++++++++++++++++++ nachet/nachet_ui.py | 84 +++++++++++++++++++++++++++++++++++++ 4 files changed, 200 insertions(+), 52 deletions(-) create mode 100644 nachet/inference_testing.py create mode 100644 nachet/nachet_test.py create mode 100644 nachet/nachet_ui.py diff --git a/nachet/datastore.py b/nachet/datastore.py index 09a4fa3..ead1805 100644 --- a/nachet/datastore.py +++ b/nachet/datastore.py @@ -1,34 +1,11 @@ -import os -import time -import re - from concurrent.futures import ThreadPoolExecutor -from dotenv import load_dotenv from azure.storage.blob import BlobServiceClient, ContainerClient -from azure.core.exceptions import ResourceNotFoundError - - -load_dotenv() - -# Environment variable -AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") -SEEDS_NAME = os.getenv("SEEDS_NAME") -TESTING_FOLDERS = os.getenv("TESTING_FOLDERS") class DatastoreErrors(Exception): pass -def format_list_env(): - """ - Format the list of environment variable for the seeds name and testing - folders. - """ - seeds_name = [name.strip() for name in SEEDS_NAME.split(',')] - testing_folders = [name.strip() for name in TESTING_FOLDERS.split(',')] - return seeds_name, testing_folders - def get_blob_client(connection: str) -> BlobServiceClient: """ @@ -51,51 +28,52 @@ def get_blob_client(connection: str) -> BlobServiceClient: raise DatastoreErrors("could not retrieve the blob client") from error - -def get_testing_image(amount: int, blob_path: str, +def get_testing_image(blob_path: str, blob_service_client: BlobServiceClient, - seed_name: list[str], key_word: str = "testing") -> list[str]: + seed_name: str, key_word: str = "testing") -> list[str]: """ - 2024-taran-verified-seedid + Get the blobs of testing images from Azure Blob Storage. + + Args: + blob_path (str): The path to the blob containers. + blob_service_client (BlobServiceClient): The BlobServiceClient object. + seed_name (list[str]): A list of seed names. + key_word (str, optional): The keyword to filter the blob names. Defaults to "testing". + + Returns: + list[str]: A dictionary containing the seed names as keys and the corresponding image blobs as values. """ def get_blob_urls(container: ContainerClient) -> list[str]: """ + Get the blobs in a container. + + Args: + container (ContainerClient): The ContainerClient object. + + Returns: + list: A list of blob. """ + return [ - container.get_blob_client(name).url - for name in container.list_blob_names() - if key_word in name + container.get_blob_client(blob.name).download_blob().readall() + for blob in container.list_blobs() + if seed_name in blob.name and key_word in blob.name ] container_list = blob_service_client.list_containers(name_starts_with=blob_path) containers = [blob_service_client.get_container_client(c.name) for c in container_list] with ThreadPoolExecutor() as executor: - img_url = sum(executor.map(get_blob_urls, containers), []) - - seed_testing = { - seed: [url for url in img_url if seed.split(" ")[1] in url] - for seed in seed_name - } + images = sum(executor.map(get_blob_urls, containers), []) - # Divide the amount per seed to select a number of image to test the models with - nb_image_per_seed = round(amount / len(seed_name)) + return images - print(seed_testing[seed_name[0]]) - print(seed_name[0]) - - return seed_testing - - -def get_user_image(amount: int, blob_path: list[str], - blob_service_client: BlobServiceClient) -> list[str]: +def get_user_image(blob_path: list[str], + blob_service_client: BlobServiceClient, + seed_name: list[str], key_word: str = "user") -> list[str]: pass -if __name__ == "__main__": - seconds = time.perf_counter() - seeds_name, testing_folders = format_list_env() - bsc = get_blob_client(AZURE_STORAGE_CONNECTION_STRING) - get_testing_image(55, testing_folders[1], bsc, seeds_name) - print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") +def get_image_from_folder(blob_path: str) -> list[str]: + pass diff --git a/nachet/inference_testing.py b/nachet/inference_testing.py new file mode 100644 index 0000000..3dd3c78 --- /dev/null +++ b/nachet/inference_testing.py @@ -0,0 +1,45 @@ +import openpyxl +import base64 +import requests +import json + + +def start_testing(amount: int, data: list, backend_url: str, models: list[str]) -> dict: + """ + Start the testing process. + + Args: + amount (int): The number of tests to perform. + data (list): A list containing the seeds name and testing folders. + backend_url (str): The URL of the backend. + bsc: The BSC object. + + Returns: + dict: A dictionary containing the results of the testing process. + """ + + images_to_test = [ base64.b64encode(blob).decode("utf8") for blob in data[:amount]] + + for img in images_to_test: + for model in models: + paylaoad = { + "model_name":model, + "validator": "nachet_testing_image", + "folder_name": "api_test_nachet", + "container_name": "testing-images", + "imageDims": [100, 100], + "image": img + } + + headers={ + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + + response = requests.post(backend_url + "/inf", json=paylaoad, headers=headers) + result = response.json() + + print() + +def test_inference(image: str, backend_url: str) -> dict: + pass diff --git a/nachet/nachet_test.py b/nachet/nachet_test.py new file mode 100644 index 0000000..af1158c --- /dev/null +++ b/nachet/nachet_test.py @@ -0,0 +1,41 @@ + +import os +import requests + +from dotenv import load_dotenv + +from datastore import get_blob_client +from nachet_ui import actions + +load_dotenv() + +# Environment variable +AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") +SEEDS_NAME = os.getenv("SEEDS_NAME") +TESTING_FOLDERS = os.getenv("TESTING_FOLDERS") +NACHET_BACKEND_URL = os.getenv("NACHET_BACKEND_URL") + + +def format_list_env(): + """ + Format the list of environment variable for the seeds name and testing + folders. + """ + seeds_name = [name.strip() for name in SEEDS_NAME.split(',')] + testing_folders = [name.strip() for name in TESTING_FOLDERS.split(',')] + return seeds_name, testing_folders + + +def app_initialisation(): + url = NACHET_BACKEND_URL + "/model-endpoints-metadata" + response = requests.get(url) + seeds_name, testing_folders = format_list_env() + bsc = get_blob_client(AZURE_STORAGE_CONNECTION_STRING) + return seeds_name, testing_folders, bsc, NACHET_BACKEND_URL + + +def main(): + actions[5](*app_initialisation()) + +if __name__ == "__main__": + main() diff --git a/nachet/nachet_ui.py b/nachet/nachet_ui.py new file mode 100644 index 0000000..7fa92d7 --- /dev/null +++ b/nachet/nachet_ui.py @@ -0,0 +1,84 @@ +import time +import sys + +from datastore import get_testing_image +from inference_testing import start_testing + +CACHE = {} + + +def test_image(seed: str): + clear() + print("Start loading images") + if not CACHE.get("blob_image"): + seconds = time.perf_counter() + CACHE["blob_image"] = get_testing_image( + CACHE["testing_folders"][0], + CACHE["DATASTORE_CLIENT"], + seed, + ) + print("Finish Loading image") + print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") + + amount = input( +""" +Enter the number of image you want to test the models against: +""" + ) + nb_image = int(amount) + + _ = input("Enter any key to start testing") + + start_testing(nb_image, CACHE["blob_image"], CACHE["NACHET_BACKEND_URL"], CACHE["DATASTORE_CLIENT"]) + + #actions[4](0) + + +def user_image(seed: str): + clear() + print(f"not implement yet {seed}") + menu() + +def folder_specific_image(seed: str): + clear() + print("not implement yet") + menu() + +def clear(): + sys.stdout.write("\033[H\033[J") + +def menu(*args): + if args: + CACHE["seeds_name"] = args[0] + CACHE["testing_folders"] = args[1] + CACHE["DATASTORE_CLIENT"] = args[2] + CACHE["NACHET_BACKEND_URL"] = args[3] + + print("Welcome to nachet testing app!") + for i, seed in enumerate(CACHE["seeds_name"]): + print(f"{i+1}. {seed}") + + selection = input("Select the seed you want to test:") + + seed_to_test = int(selection)-1 + + clear() + + selection = input(""" +To test with test picture enter 1. +To test with user picture enter 2. +To test with a pecific folder enter 3. +To exit quit the app enter 4. + +Please enter your selection: + """ + ) + actions[int(selection)](CACHE["seeds_name"][seed_to_test]) + +actions = { + 1: test_image, + 2: user_image, + 3: folder_specific_image, + 4: sys.exit, + 5: menu, +} From 420e62d335a9236094712211d6e27e9ceb9a110e Mon Sep 17 00:00:00 2001 From: MaxenceGui Date: Wed, 17 Apr 2024 18:00:41 -0400 Subject: [PATCH 4/5] fixes #18: Connect the test image the inference request --- .env.template | 5 +++++ nachet/inference_testing.py | 17 ++++++++--------- nachet/nachet_test.py | 5 +++-- nachet/nachet_ui.py | 21 +++++++++++++-------- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/.env.template b/.env.template index ec6cf6d..8addea0 100644 --- a/.env.template +++ b/.env.template @@ -2,3 +2,8 @@ BING_SEARCH_KEY = BING_ENDPOINT = CACHE_PATH = OUTPUT_FOLDER = + +AZURE_STORAGE_CONNECTION_STRING= +SEEDS_NAME= +TESTING_FOLDERS= +NACHET_BACKEND_URL= diff --git a/nachet/inference_testing.py b/nachet/inference_testing.py index 3dd3c78..2c32549 100644 --- a/nachet/inference_testing.py +++ b/nachet/inference_testing.py @@ -1,7 +1,6 @@ import openpyxl import base64 import requests -import json def start_testing(amount: int, data: list, backend_url: str, models: list[str]) -> dict: @@ -18,28 +17,28 @@ def start_testing(amount: int, data: list, backend_url: str, models: list[str]) dict: A dictionary containing the results of the testing process. """ - images_to_test = [ base64.b64encode(blob).decode("utf8") for blob in data[:amount]] + images_to_test = [base64.b64encode(blob).decode("utf8") for blob in data[:amount]] for img in images_to_test: for model in models: - paylaoad = { - "model_name":model, + payload = { + "model_name": model, "validator": "nachet_testing_image", "folder_name": "api_test_nachet", "container_name": "testing-images", "imageDims": [100, 100], - "image": img + "image": "data:image/PNG;base64," + img } - headers={ + headers = { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*", - }, + } - response = requests.post(backend_url + "/inf", json=paylaoad, headers=headers) + response = requests.post(backend_url + "/inf", json=payload, headers=headers).json() result = response.json() - print() + print("ici") def test_inference(image: str, backend_url: str) -> dict: pass diff --git a/nachet/nachet_test.py b/nachet/nachet_test.py index af1158c..c3d8d4a 100644 --- a/nachet/nachet_test.py +++ b/nachet/nachet_test.py @@ -28,10 +28,11 @@ def format_list_env(): def app_initialisation(): url = NACHET_BACKEND_URL + "/model-endpoints-metadata" - response = requests.get(url) + response = requests.get(url).json() + models = [m.get("model_name") for m in response] seeds_name, testing_folders = format_list_env() bsc = get_blob_client(AZURE_STORAGE_CONNECTION_STRING) - return seeds_name, testing_folders, bsc, NACHET_BACKEND_URL + return seeds_name, testing_folders, models, bsc, NACHET_BACKEND_URL def main(): diff --git a/nachet/nachet_ui.py b/nachet/nachet_ui.py index 7fa92d7..4f5c183 100644 --- a/nachet/nachet_ui.py +++ b/nachet/nachet_ui.py @@ -4,20 +4,24 @@ from datastore import get_testing_image from inference_testing import start_testing -CACHE = {} +CACHE = { + "blob_image": {} +} def test_image(seed: str): clear() print("Start loading images") - if not CACHE.get("blob_image"): - seconds = time.perf_counter() - CACHE["blob_image"] = get_testing_image( + seconds = time.perf_counter() + + if not CACHE.get("blob_image").get(seed): + CACHE["blob_image"][seed] = get_testing_image( CACHE["testing_folders"][0], CACHE["DATASTORE_CLIENT"], seed, ) - print("Finish Loading image") + + print(f"Finish loading {len(CACHE['blob_image'][seed])} images") print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") amount = input( @@ -29,7 +33,7 @@ def test_image(seed: str): _ = input("Enter any key to start testing") - start_testing(nb_image, CACHE["blob_image"], CACHE["NACHET_BACKEND_URL"], CACHE["DATASTORE_CLIENT"]) + start_testing(nb_image, CACHE["blob_image"][seed], CACHE["NACHET_BACKEND_URL"], CACHE["MODELS"]) #actions[4](0) @@ -51,8 +55,9 @@ def menu(*args): if args: CACHE["seeds_name"] = args[0] CACHE["testing_folders"] = args[1] - CACHE["DATASTORE_CLIENT"] = args[2] - CACHE["NACHET_BACKEND_URL"] = args[3] + CACHE["MODELS"] = args[2] + CACHE["DATASTORE_CLIENT"] = args[3] + CACHE["NACHET_BACKEND_URL"] = args[4] print("Welcome to nachet testing app!") for i, seed in enumerate(CACHE["seeds_name"]): From e5c27e12ba1a954a889351f17868ed369182a7d2 Mon Sep 17 00:00:00 2001 From: MaxenceGui Date: Thu, 18 Apr 2024 15:19:09 -0400 Subject: [PATCH 5/5] fixes #18: Succesfully return inference result --- nachet/inference_testing.py | 36 ++++++++++++++++++++++++++++++++---- nachet/nachet_ui.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/nachet/inference_testing.py b/nachet/inference_testing.py index 2c32549..9d4a588 100644 --- a/nachet/inference_testing.py +++ b/nachet/inference_testing.py @@ -1,6 +1,6 @@ -import openpyxl import base64 import requests +import time def start_testing(amount: int, data: list, backend_url: str, models: list[str]) -> dict: @@ -17,9 +17,15 @@ def start_testing(amount: int, data: list, backend_url: str, models: list[str]) dict: A dictionary containing the results of the testing process. """ + results = {model: {} for model in models} + images_to_test = [base64.b64encode(blob).decode("utf8") for blob in data[:amount]] + i = 1 for img in images_to_test: + + key = f"image{i:02d}" + i += 1 for model in models: payload = { "model_name": model, @@ -35,10 +41,32 @@ def start_testing(amount: int, data: list, backend_url: str, models: list[str]) "Access-Control-Allow-Origin": "*", } - response = requests.post(backend_url + "/inf", json=payload, headers=headers).json() - result = response.json() + start = time.perf_counter() + response = requests.post( + backend_url + "/inf", json=payload, headers=headers).json() + end = time.perf_counter() - start + + boxes = response[0]["boxes"] + + print(key) + print(f"number of seed detecte: {len(boxes)}") + + topN = [] + + if boxes[0].get("topN"): + for box in boxes: + topN.extend([score.get("label") for score in box.get("topN")]) + + results.get(model).update({ + key: { + "labels": [box.get("label") for box in boxes], + "topN": topN, + "nb_seeds": response[0].get("totalBoxes"), + "request_time": end, + } + }) - print("ici") + return results def test_inference(image: str, backend_url: str) -> dict: pass diff --git a/nachet/nachet_ui.py b/nachet/nachet_ui.py index 4f5c183..d513e71 100644 --- a/nachet/nachet_ui.py +++ b/nachet/nachet_ui.py @@ -1,5 +1,7 @@ import time import sys +import openpyxl +import json from datastore import get_testing_image from inference_testing import start_testing @@ -33,9 +35,18 @@ def test_image(seed: str): _ = input("Enter any key to start testing") - start_testing(nb_image, CACHE["blob_image"][seed], CACHE["NACHET_BACKEND_URL"], CACHE["MODELS"]) + clear() + print("Start testing images") + seconds = time.perf_counter() + results = start_testing(nb_image, CACHE["blob_image"][seed], CACHE["NACHET_BACKEND_URL"], CACHE["MODELS"]) + print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") + + with open(f"results_{seed}.txt", "w+") as f: + f.write(json.dumps(results, indent=4)) + + save_to_workbook(results, seed) - #actions[4](0) + print("Results saved to workbook") def user_image(seed: str): @@ -48,6 +59,23 @@ def folder_specific_image(seed: str): print("not implement yet") menu() +def save_to_workbook(results: dict, seed: str): + wb = openpyxl.Workbook() + ws = wb.active + + for model, result in results.items(): + ws.append([model]) + ws.append(["Image", "Labels", "TopN", "time"]) + + for key, value in result.items(): + ws.append([key, value["request_time"]]) + for i in range(value["nb_seeds"]): + ws.append([value["labels"][i], value["topN"][i]]) + + wb.save(f"results_{seed}.xlsx") + + print(f"Results saved in results_{seed}.xlsx") + def clear(): sys.stdout.write("\033[H\033[J")