Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes 18: Gather and prepare test data for nachet testing #19

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@ BING_SEARCH_KEY =
BING_ENDPOINT =
CACHE_PATH =
OUTPUT_FOLDER =

AZURE_STORAGE_CONNECTION_STRING=
SEEDS_NAME=
TESTING_FOLDERS=
NACHET_BACKEND_URL=
79 changes: 79 additions & 0 deletions nachet/datastore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from concurrent.futures import ThreadPoolExecutor

from azure.storage.blob import BlobServiceClient, ContainerClient


class DatastoreErrors(Exception):
pass


def get_blob_client(connection: str) -> BlobServiceClient:
"""
Returns a BlobServiceClient object initialized with the provided connection
string.

Args:
connection (str): The connection string for the Azure Blob Storage
account.

Returns:
BlobServiceClient: The initialized BlobServiceClient object.

Raises:
DatastoreErrors: If the blob client cannot be retrieved.
"""
try:
return BlobServiceClient.from_connection_string(connection)
except ValueError as error:
raise DatastoreErrors("could not retrieve the blob client") from error


def get_testing_image(blob_path: str,
blob_service_client: BlobServiceClient,
seed_name: str, key_word: str = "testing") -> list[str]:
"""
Get the blobs of testing images from Azure Blob Storage.

Args:
blob_path (str): The path to the blob containers.
blob_service_client (BlobServiceClient): The BlobServiceClient object.
seed_name (list[str]): A list of seed names.
key_word (str, optional): The keyword to filter the blob names. Defaults to "testing".

Returns:
list[str]: A dictionary containing the seed names as keys and the corresponding image blobs as values.
"""

def get_blob_urls(container: ContainerClient) -> list[str]:
"""
Get the blobs in a container.

Args:
container (ContainerClient): The ContainerClient object.

Returns:
list: A list of blob.
"""

return [
container.get_blob_client(blob.name).download_blob().readall()
for blob in container.list_blobs()
if seed_name in blob.name and key_word in blob.name
]

container_list = blob_service_client.list_containers(name_starts_with=blob_path)
containers = [blob_service_client.get_container_client(c.name) for c in container_list]

with ThreadPoolExecutor() as executor:
images = sum(executor.map(get_blob_urls, containers), [])

return images


def get_user_image(blob_path: list[str],
blob_service_client: BlobServiceClient,
seed_name: list[str], key_word: str = "user") -> list[str]:
pass

def get_image_from_folder(blob_path: str) -> list[str]:
pass
72 changes: 72 additions & 0 deletions nachet/inference_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import base64
import requests
import time


def start_testing(amount: int, data: list, backend_url: str, models: list[str]) -> dict:
"""
Start the testing process.

Args:
amount (int): The number of tests to perform.
data (list): A list containing the seeds name and testing folders.
backend_url (str): The URL of the backend.
bsc: The BSC object.

Returns:
dict: A dictionary containing the results of the testing process.
"""

results = {model: {} for model in models}

images_to_test = [base64.b64encode(blob).decode("utf8") for blob in data[:amount]]

i = 1
for img in images_to_test:

key = f"image{i:02d}"
i += 1
for model in models:
payload = {
"model_name": model,
"validator": "nachet_testing_image",
"folder_name": "api_test_nachet",
"container_name": "testing-images",
"imageDims": [100, 100],
"image": "data:image/PNG;base64," + img
}

headers = {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
}

start = time.perf_counter()
response = requests.post(
backend_url + "/inf", json=payload, headers=headers).json()
end = time.perf_counter() - start

boxes = response[0]["boxes"]

print(key)
print(f"number of seed detecte: {len(boxes)}")

topN = []

if boxes[0].get("topN"):
for box in boxes:
topN.extend([score.get("label") for score in box.get("topN")])

results.get(model).update({
key: {
"labels": [box.get("label") for box in boxes],
"topN": topN,
"nb_seeds": response[0].get("totalBoxes"),
"request_time": end,
}
})

return results

def test_inference(image: str, backend_url: str) -> dict:
pass
42 changes: 42 additions & 0 deletions nachet/nachet_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

import os
import requests

from dotenv import load_dotenv

from datastore import get_blob_client
from nachet_ui import actions

load_dotenv()

# Environment variable
AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
SEEDS_NAME = os.getenv("SEEDS_NAME")
TESTING_FOLDERS = os.getenv("TESTING_FOLDERS")
NACHET_BACKEND_URL = os.getenv("NACHET_BACKEND_URL")


def format_list_env():
"""
Format the list of environment variable for the seeds name and testing
folders.
"""
seeds_name = [name.strip() for name in SEEDS_NAME.split(',')]
testing_folders = [name.strip() for name in TESTING_FOLDERS.split(',')]
return seeds_name, testing_folders


def app_initialisation():
url = NACHET_BACKEND_URL + "/model-endpoints-metadata"
response = requests.get(url).json()
models = [m.get("model_name") for m in response]
seeds_name, testing_folders = format_list_env()
bsc = get_blob_client(AZURE_STORAGE_CONNECTION_STRING)
return seeds_name, testing_folders, models, bsc, NACHET_BACKEND_URL


def main():
actions[5](*app_initialisation())

if __name__ == "__main__":
main()
117 changes: 117 additions & 0 deletions nachet/nachet_ui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import time
import sys
import openpyxl
import json

from datastore import get_testing_image
from inference_testing import start_testing

CACHE = {
"blob_image": {}
}


def test_image(seed: str):
clear()
print("Start loading images")
seconds = time.perf_counter()

if not CACHE.get("blob_image").get(seed):
CACHE["blob_image"][seed] = get_testing_image(
CACHE["testing_folders"][0],
CACHE["DATASTORE_CLIENT"],
seed,
)

print(f"Finish loading {len(CACHE['blob_image'][seed])} images")
print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds")

amount = input(
"""
Enter the number of image you want to test the models against:
"""
)
nb_image = int(amount)

_ = input("Enter any key to start testing")

clear()
print("Start testing images")
seconds = time.perf_counter()
results = start_testing(nb_image, CACHE["blob_image"][seed], CACHE["NACHET_BACKEND_URL"], CACHE["MODELS"])
print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds")

with open(f"results_{seed}.txt", "w+") as f:
f.write(json.dumps(results, indent=4))

save_to_workbook(results, seed)

print("Results saved to workbook")


def user_image(seed: str):
clear()
print(f"not implement yet {seed}")
menu()

def folder_specific_image(seed: str):
clear()
print("not implement yet")
menu()

def save_to_workbook(results: dict, seed: str):
wb = openpyxl.Workbook()
ws = wb.active

for model, result in results.items():
ws.append([model])
ws.append(["Image", "Labels", "TopN", "time"])

for key, value in result.items():
ws.append([key, value["request_time"]])
for i in range(value["nb_seeds"]):
ws.append([value["labels"][i], value["topN"][i]])

wb.save(f"results_{seed}.xlsx")

print(f"Results saved in results_{seed}.xlsx")

def clear():
sys.stdout.write("\033[H\033[J")

def menu(*args):
if args:
CACHE["seeds_name"] = args[0]
CACHE["testing_folders"] = args[1]
CACHE["MODELS"] = args[2]
CACHE["DATASTORE_CLIENT"] = args[3]
CACHE["NACHET_BACKEND_URL"] = args[4]

print("Welcome to nachet testing app!")
for i, seed in enumerate(CACHE["seeds_name"]):
print(f"{i+1}. {seed}")

selection = input("Select the seed you want to test:")

seed_to_test = int(selection)-1

clear()

selection = input("""
To test with test picture enter 1.
To test with user picture enter 2.
To test with a pecific folder enter 3.
To exit quit the app enter 4.

Please enter your selection:
"""
)
actions[int(selection)](CACHE["seeds_name"][seed_to_test])

actions = {
1: test_image,
2: user_image,
3: folder_specific_image,
4: sys.exit,
5: menu,
}
10 changes: 10 additions & 0 deletions nachet/todo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# TODO Createa connexion to azure storage that can eventually be switch to a
# datastore connection

# TODO Retrieve a user specified amount of testing image

# TODO Retrieve a user specified amount of user image

# TODO Have a datastructure for both image type

# TODO have a user message displaying the end of the loading image.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
locust
python-dotenv
azure-cognitiveservices-search-websearch
azure-storage-blob
azure-identity
msrest
openpyxl
natsort
Expand Down
Loading