From fc366a1713a5b11373168634a6e0fb11d5b2162e Mon Sep 17 00:00:00 2001 From: prabhuomkar Date: Sat, 22 Jun 2024 10:37:06 +0530 Subject: [PATCH 1/3] Updated API tests --- api/internal/handlers/mediaitems_test.go | 30 ++++++++++++++++++++++++ api/pkg/storage/storage.go | 7 +++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/api/internal/handlers/mediaitems_test.go b/api/internal/handlers/mediaitems_test.go index 224e974..2456bea 100644 --- a/api/internal/handlers/mediaitems_test.go +++ b/api/internal/handlers/mediaitems_test.go @@ -1192,6 +1192,7 @@ func TestUploadMediaItems(t *testing.T) { sampleFile4, contentType4 := getMockedMediaItemFile(t) sampleFile5, contentType5 := getMockedMediaItemFile(t) sampleFile6, contentType6 := getMockedMediaItemFile(t) + sampleFile7, contentType7 := getMockedMediaItemFile(t) tests := []Test{ { "upload mediaitems with invalid command", @@ -1321,6 +1322,35 @@ func TestUploadMediaItems(t *testing.T) { http.StatusInternalServerError, "some db error", }, + { + "upload mediaitems with error saving hash due to duplicates", + http.MethodPost, + "/v1/mediaItems", + "/v1/mediaItems", + []string{}, + []string{}, + map[string]string{ + echo.HeaderContentType: contentType7, + }, + sampleFile7, + func(mock sqlmock.Sqlmock) { + mock.ExpectBegin() + mock.ExpectExec(regexp.QuoteMeta(`INSERT INTO "mediaitems"`)). + WillReturnResult(sqlmock.NewResult(1, 1)) + mock.ExpectCommit() + mock.ExpectBegin() + mock.ExpectExec(regexp.QuoteMeta(`UPDATE "mediaitems"`)). + WillReturnError(errors.New("violates unique constraint")) + mock.ExpectRollback() + }, + nil, + nil, + func(handler *Handler) func(ctx echo.Context) error { + return handler.UploadMediaItems + }, + http.StatusConflict, + "mediaitem already exists", + }, { "upload mediaitems with error saving hash in mediaitem process", http.MethodPost, diff --git a/api/pkg/storage/storage.go b/api/pkg/storage/storage.go index 0077804..88ba351 100644 --- a/api/pkg/storage/storage.go +++ b/api/pkg/storage/storage.go @@ -1,6 +1,7 @@ package storage import ( + "errors" "os" "github.com/minio/minio-go/v7" @@ -51,15 +52,15 @@ func Init(cfg *Config) Provider { //nolint: ireturn } } err := os.Mkdir(cfg.Root+"/originals", dirPermission) - if err != nil { + if err != nil && !errors.Is(err, os.ErrExist) { slog.Error("error creating storage originals directory", "error", err) } err = os.Mkdir(cfg.Root+"/previews", dirPermission) - if err != nil { + if err != nil && !errors.Is(err, os.ErrExist) { slog.Error("error creating storage previews directory", "error", err) } err = os.Mkdir(cfg.Root+"/thumbnails", dirPermission) - if err != nil { + if err != nil && !errors.Is(err, os.ErrExist) { slog.Error("error creating storage thumbnails directory", "error", err) } return &Disk{Root: cfg.Root} From 0262323cdbdff9badf0acb6f82011023aef029ce Mon Sep 17 00:00:00 2001 From: prabhuomkar Date: Sat, 22 Jun 2024 11:17:28 +0530 Subject: [PATCH 2/3] WIP test fixes --- api/internal/auth/auth.go | 12 +++++++++--- ml/requirements.txt | 1 + tests/features/environment.py | 2 ++ worker/requirements.txt | 1 + worker/src/providers/search/pytorch.py | 13 ------------- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/api/internal/auth/auth.go b/api/internal/auth/auth.go index 6e3cb53..4c526b1 100644 --- a/api/internal/auth/auth.go +++ b/api/internal/auth/auth.go @@ -7,6 +7,7 @@ import ( "errors" "time" + "github.com/go-redis/redis/v8" "github.com/golang-jwt/jwt/v4" uuid "github.com/satori/go.uuid" "golang.org/x/exp/slog" @@ -69,7 +70,9 @@ func RefreshTokens(cfg *config.Config, cache cache.Provider, refreshToken string func RemoveTokens(cache cache.Provider, accessToken string) error { refreshToken, err := cache.Get(accessToken) if err != nil { - slog.Error("error getting access token from cache", "error", err) + if !errors.Is(err, redis.Nil) { + slog.Error("error getting access token from cache", "error", err) + } return err } @@ -82,8 +85,11 @@ func RemoveTokens(cache cache.Provider, accessToken string) error { // VerifyToken ... func VerifyToken(cfg *config.Config, cache cache.Provider, accessToken string) (*TokenClaims, error) { - if _, err := cache.Get(accessToken); err != nil { - slog.Error("error getting access token from cache", "error", err) + _, err := cache.Get(accessToken) + if err != nil { + if !errors.Is(err, redis.Nil) { + slog.Error("error getting access token from cache", "error", err) + } return nil, err } diff --git a/ml/requirements.txt b/ml/requirements.txt index 42232cf..fc4a873 100644 --- a/ml/requirements.txt +++ b/ml/requirements.txt @@ -1,4 +1,5 @@ facenet-pytorch==2.5.3 +numpy==1.26.4 opencv-python==4.10.0.84 Pillow==10.3.0 paddle2onnx==1.2.1 diff --git a/tests/features/environment.py b/tests/features/environment.py index 9e7926a..88980b7 100644 --- a/tests/features/environment.py +++ b/tests/features/environment.py @@ -1,4 +1,5 @@ import psycopg2 +import time ALL_TABLES = ['thing_mediaitems', 'place_mediaitems', @@ -10,6 +11,7 @@ def before_feature(context, feature): cleanup_tables() def after_feature(context, feature): + time.sleep(10) cleanup_tables() def cleanup_tables(): diff --git a/worker/requirements.txt b/worker/requirements.txt index 2768aad..c275abb 100644 --- a/worker/requirements.txt +++ b/worker/requirements.txt @@ -5,6 +5,7 @@ grpcio==1.64.1 grpcio-tools==1.64.1 minio==7.2.7 moviepy==1.0.3 +numpy==1.26.4 opencv-python==4.10.0.84 prometheus-client==0.20.0 Pillow==10.3.0 diff --git a/worker/src/providers/search/pytorch.py b/worker/src/providers/search/pytorch.py index ba453b2..8fc858c 100644 --- a/worker/src/providers/search/pytorch.py +++ b/worker/src/providers/search/pytorch.py @@ -3,7 +3,6 @@ from PIL import Image import torch -from moviepy.editor import VideoFileClip from transformers import AutoTokenizer, AutoImageProcessor @@ -34,16 +33,4 @@ def generate_embedding(self, input_type: str, data: any): logging.debug(f'generated photo embedding: {res}') return [res] return [] - if data and data['type'] == 'video': - result = [] - video_clip = VideoFileClip(data['previewPath']) - for frame in video_clip.iter_frames(fps=video_clip.fps): - input_tensor = self.processor(Image.open(frame), return_tensors='pt') - res = self.vision_module.forward(**input_tensor) - if res is not None: - res = res.tolist() - result += [res] - video_clip.reader.close() - logging.debug(f'generated video embedding: {result}') - return result return [] From 9c69f95eb650788d701010cea38f579fdc3904f3 Mon Sep 17 00:00:00 2001 From: prabhuomkar Date: Sat, 22 Jun 2024 15:49:10 +0530 Subject: [PATCH 3/3] Fixes for correcting logs, enhancing tests --- api/config/config.go | 4 ++-- api/internal/handlers/albums.go | 2 +- api/internal/handlers/auth.go | 2 +- api/internal/handlers/sharing.go | 2 +- api/internal/jobs/jobs.go | 8 +++++++- docs/docs/user-guide/features/search.md | 2 +- docs/docs/user-guide/features/things.md | 2 +- tests/features/albums.feature | 2 +- tests/features/environment.py | 1 - tests/features/jobs.feature | 2 +- tests/features/library.feature | 2 +- tests/features/mediaitems.feature | 10 +++++----- tests/features/places.feature | 2 +- tests/features/search.feature | 8 +++++--- tests/features/sharing.feature | 2 +- tests/features/steps/mediaitems.py | 5 +++-- tests/features/steps/search.py | 11 ++++++++--- tests/features/things.feature | 2 +- worker/src/providers/faces/pytorch.py | 2 ++ 19 files changed, 43 insertions(+), 28 deletions(-) diff --git a/api/config/config.go b/api/config/config.go index 18414e1..c18a9dd 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -67,11 +67,11 @@ type ( Faces bool `envconfig:"SMRITI_ML_FACES" default:"true"` PlacesProvider string `envconfig:"SMRITI_ML_PLACES_PROVIDER" default:"openstreetmap"` ClassificationProvider string `envconfig:"SMRITI_ML_CLASSIFICATION_PROVIDER" default:"pytorch"` - ClassificationParams string `envconfig:"SMRITI_ML_CLASSIFICATION_PARAMS" default:"{\"file\":\"classification_v240508.pt\"}"` + ClassificationParams string `envconfig:"SMRITI_ML_CLASSIFICATION_PARAMS" default:"{\"file\":\"classification_v240615.pt\"}"` OCRProvider string `envconfig:"SMRITI_ML_OCR_PROVIDER" default:"paddlepaddle"` OCRParams string `envconfig:"SMRITI_ML_OCR_PARAMS" default:"{\"det_model_dir\":\"det_onnx\",\"rec_model_dir\":\"rec_onnx\",\"cls_model_dir\":\"cls_onnx\"}"` SearchProvider string `envconfig:"SMRITI_ML_SEARCH_PROVIDER" default:"pytorch"` - SearchParams string `envconfig:"SMRITI_ML_SEARCH_PARAMS" default:"{\"tokenizer_dir\":\"search_tokenizer\",\"processor_dir\":\"search_processor\",\"text_file\":\"search_text_v240508.pt\",\"vision_file\":\"search_vision_v240508.pt\"}"` //nolint:lll + SearchParams string `envconfig:"SMRITI_ML_SEARCH_PARAMS" default:"{\"tokenizer_dir\":\"search_tokenizer\",\"processor_dir\":\"search_processor\",\"text_file\":\"search_text_v240615.pt\",\"vision_file\":\"search_vision_v240615.pt\"}"` //nolint:lll FacesProvider string `envconfig:"SMRITI_ML_FACES_PROVIDER" default:"pytorch"` FacesParams string `envconfig:"SMRITI_ML_FACES_PARAMS" default:"{\"minutes\":\"1\",\"face_threshold\":\"0.9\",\"model\":\"vggface2\",\"clustering\":\"annoy\"}"` PreviewThumbnailParams string `envconfig:"SMRITI_ML_PREVIEW_THUMBNAIL_PARAMS" default:"{\"thumbnail_size\":\"512\"}"` diff --git a/api/internal/handlers/albums.go b/api/internal/handlers/albums.go index b087ccd..0a3c944 100644 --- a/api/internal/handlers/albums.go +++ b/api/internal/handlers/albums.go @@ -133,10 +133,10 @@ func (h *Handler) GetAlbum(ctx echo.Context) error { Preload("CoverMediaItem"). First(&album) if result.Error != nil { - slog.Error("error getting album", "error", result.Error) if errors.Is(result.Error, gorm.ErrRecordNotFound) { return echo.NewHTTPError(http.StatusNotFound, "album not found") } + slog.Error("error getting album", "error", result.Error) return echo.NewHTTPError(http.StatusInternalServerError, result.Error.Error()) } return ctx.JSON(http.StatusOK, album) diff --git a/api/internal/handlers/auth.go b/api/internal/handlers/auth.go index af1b063..838d771 100644 --- a/api/internal/handlers/auth.go +++ b/api/internal/handlers/auth.go @@ -37,10 +37,10 @@ func (h *Handler) Login(ctx echo.Context) error { Where("username=? AND password=?", &loginRequest.Username, getPasswordHash(*loginRequest.Password)). First(&user) if result.Error != nil { - slog.Error("error getting user", "error", result.Error) if errors.Is(result.Error, gorm.ErrRecordNotFound) { return echo.NewHTTPError(http.StatusNotFound, "incorrect username or password") } + slog.Error("error getting user", "error", result.Error) return echo.NewHTTPError(http.StatusInternalServerError, result.Error.Error()) } accessToken, refreshToken, err := auth.GetTokens(h.Config, h.Cache, user) diff --git a/api/internal/handlers/sharing.go b/api/internal/handlers/sharing.go index 9de24f3..865331a 100644 --- a/api/internal/handlers/sharing.go +++ b/api/internal/handlers/sharing.go @@ -41,10 +41,10 @@ func (h *Handler) GetSharedAlbum(ctx echo.Context) error { Preload("CoverMediaItem"). First(&sharedAlbum) if result.Error != nil { - slog.Error("error getting shared album", "error", result.Error) if errors.Is(result.Error, gorm.ErrRecordNotFound) { return echo.NewHTTPError(http.StatusNotFound, "shared link not found") } + slog.Error("error getting shared album", "error", result.Error) return echo.NewHTTPError(http.StatusInternalServerError, result.Error.Error()) } return ctx.JSON(http.StatusOK, sharedAlbum) diff --git a/api/internal/jobs/jobs.go b/api/internal/jobs/jobs.go index 8073a60..8a45c24 100644 --- a/api/internal/jobs/jobs.go +++ b/api/internal/jobs/jobs.go @@ -6,6 +6,7 @@ import ( "api/pkg/services/worker" "api/pkg/storage" "context" + "errors" "fmt" "log/slog" "strconv" @@ -110,6 +111,9 @@ func (j *Job) getJobMediaItem(jobCfg models.Job, lastMediaItemID uuid.UUID) (mod mediaItem := models.MediaItem{} result := j.DB.Where("user_id=? AND id>?", jobCfg.UserID, lastMediaItemID).Order("created_at").First(&mediaItem) if result.Error != nil { + if errors.Is(result.Error, gorm.ErrRecordNotFound) { + return mediaItem, result.Error + } slog.Error("error getting job mediaitem", "error", result.Error) return mediaItem, result.Error } @@ -246,7 +250,9 @@ func (j *Job) getPayload(jobCfg models.Job, mediaItem models.MediaItem) map[stri payload["mimeType"] = mediaItem.MimeType payload["type"] = string(mediaItem.MediaItemType) payload["exifdata"] = *mediaItem.EXIFData - payload["keywords"] = *mediaItem.Keywords + if mediaItem.Keywords != nil { + payload["keywords"] = *mediaItem.Keywords + } return payload } diff --git a/docs/docs/user-guide/features/search.md b/docs/docs/user-guide/features/search.md index ce5fa13..b517b8e 100644 --- a/docs/docs/user-guide/features/search.md +++ b/docs/docs/user-guide/features/search.md @@ -3,7 +3,7 @@ Enable this feature using API Configuration: ```bash SMRITI_ML_SEARCH: true SMRITI_ML_SEARCH_PROVIDER: pytorch -SMRITI_ML_SEARCH_PARAMS: {"tokenizer_dir":"search_tokenizer","processor_dir":"search_processor","text_file":"search_text_v240508.pt","vision_file":"search_vision_v240508.pt"} +SMRITI_ML_SEARCH_PARAMS: {"tokenizer_dir":"search_tokenizer","processor_dir":"search_processor","text_file":"search_text_v240615.pt","vision_file":"search_vision_v240615.pt"} ``` ## Use Cases diff --git a/docs/docs/user-guide/features/things.md b/docs/docs/user-guide/features/things.md index e2fb64d..468f868 100644 --- a/docs/docs/user-guide/features/things.md +++ b/docs/docs/user-guide/features/things.md @@ -4,7 +4,7 @@ Enable this feature using API Configuration: SMRITI_FEATURE_THINGS: true SMRITI_ML_CLASSIFICATION: true SMRITI_ML_CLASSIFICATION_PROVIDER: pytorch -SMRITI_ML_CLASSIFICATION_PARAMS: {"file":"classification_v240508.pt"} +SMRITI_ML_CLASSIFICATION_PARAMS: {"file":"classification_v240615.pt"} ``` ## Use Cases diff --git a/tests/features/albums.feature b/tests/features/albums.feature index 6f4ea6a..2cc0a88 100644 --- a/tests/features/albums.feature +++ b/tests/features/albums.feature @@ -4,7 +4,7 @@ Feature: Albums Given a user default is created if does not exist When user default logs in Then token is generated - When upload default photo mediaitem with auth if does not exist and wait 10 seconds + When upload default photo mediaitem with auth if does not exist and wait 5 seconds Then mediaitem is uploaded or exists Scenario: Validate Create Album diff --git a/tests/features/environment.py b/tests/features/environment.py index 88980b7..b9df28b 100644 --- a/tests/features/environment.py +++ b/tests/features/environment.py @@ -11,7 +11,6 @@ def before_feature(context, feature): cleanup_tables() def after_feature(context, feature): - time.sleep(10) cleanup_tables() def cleanup_tables(): diff --git a/tests/features/jobs.feature b/tests/features/jobs.feature index 47b35a8..a90484c 100644 --- a/tests/features/jobs.feature +++ b/tests/features/jobs.feature @@ -5,7 +5,7 @@ Feature: Jobs Then user jobs is created When user jobs logs in Then token is generated - When upload jobs.jpeg photo mediaitem with auth + When upload jobs.jpeg photo mediaitem with auth and wait 5 seconds Then mediaitem is uploaded When update jobs user with auth Then user jobs is updated diff --git a/tests/features/library.feature b/tests/features/library.feature index 7f34d46..b7a3101 100644 --- a/tests/features/library.feature +++ b/tests/features/library.feature @@ -4,7 +4,7 @@ Feature: Library Given a user default is created if does not exist When user default logs in Then token is generated - When upload default photo mediaitem with auth if does not exist and wait 10 seconds + When upload default photo mediaitem with auth if does not exist and wait 5 seconds Then mediaitem is uploaded or exists When get mediaitem with auth Then mediaitem is present diff --git a/tests/features/mediaitems.feature b/tests/features/mediaitems.feature index 4457341..0e61f3a 100644 --- a/tests/features/mediaitems.feature +++ b/tests/features/mediaitems.feature @@ -7,9 +7,9 @@ Feature: MediaItems Scenario: Validate Create Photo MediaItem Given there are no mediaitems - When upload default photo mediaitem without auth + When upload default photo mediaitem without auth and wait 0 seconds Then auth error is found - When upload default photo mediaitem with auth + When upload default photo mediaitem with auth and wait 5 seconds Then mediaitem is uploaded When get mediaitem without auth Then auth error is found @@ -37,7 +37,7 @@ Feature: MediaItems Scenario: Validate Duplicate Photo MediaItem Given a mediaitem exists - When upload default photo mediaitem with auth + When upload default photo mediaitem with auth and wait 0 seconds Then mediaitem already exists Scenario: Validate Delete Photo MediaItem @@ -57,9 +57,9 @@ Feature: MediaItems Scenario: Validate Create Video MediaItem Given there are no mediaitems - When upload default video mediaitem without auth + When upload default video mediaitem without auth and wait 0 seconds Then auth error is found - When upload default video mediaitem with auth + When upload default video mediaitem with auth and wait 30 seconds Then mediaitem is uploaded When get mediaitem without auth Then auth error is found diff --git a/tests/features/places.feature b/tests/features/places.feature index d411a97..6e736a7 100644 --- a/tests/features/places.feature +++ b/tests/features/places.feature @@ -4,7 +4,7 @@ Feature: Places Given a user default is created if does not exist When user default logs in Then token is generated - When upload default photo mediaitem with auth if does not exist and wait 10 seconds + When upload default photo mediaitem with auth if does not exist and wait 5 seconds Then mediaitem is uploaded or exists Scenario: Validate Places diff --git a/tests/features/search.feature b/tests/features/search.feature index 6be3a17..6ca1a2c 100644 --- a/tests/features/search.feature +++ b/tests/features/search.feature @@ -4,12 +4,14 @@ Feature: Search Given a user default is created if does not exist When user default logs in Then token is generated - When upload default photo mediaitem with auth if does not exist and wait 10 seconds + When upload default photo mediaitem with auth if does not exist and wait 5 seconds Then mediaitem is uploaded or exists Scenario: Search MediaItems Given a mediaitem exists - When search for mediaitems without auth + When search query pizza for mediaitems without auth Then auth error is found - When search for mediaitems with auth + When search query pizza for mediaitems with auth Then searched mediaitem is present in list + When search query man playing guitar for mediaitems with auth + Then searched mediaitem is not present in list diff --git a/tests/features/sharing.feature b/tests/features/sharing.feature index a80e11d..cdb38ec 100644 --- a/tests/features/sharing.feature +++ b/tests/features/sharing.feature @@ -4,7 +4,7 @@ Feature: Sharing Given a user default is created if does not exist When user default logs in Then token is generated - When upload default photo mediaitem with auth if does not exist and wait 10 seconds + When upload default photo mediaitem with auth if does not exist and wait 5 seconds Then mediaitem is uploaded or exists Scenario: Validate Create Shared Album diff --git a/tests/features/steps/mediaitems.py b/tests/features/steps/mediaitems.py index 5b81d69..498338b 100644 --- a/tests/features/steps/mediaitems.py +++ b/tests/features/steps/mediaitems.py @@ -77,8 +77,8 @@ def step_impl(context): assert field not in context.mediaitem assert context.mediaitem['message'] == 'mediaitem not found' -@when('upload {name} {type} mediaitem {condition} auth') -def step_impl(context, name, type, condition): +@when('upload {name} {type} mediaitem {condition} auth and wait {seconds} seconds') +def step_impl(context, name, type, condition, seconds): headers = None if condition == 'with': headers = {'Authorization': f'Bearer {context.access_token}'} @@ -86,6 +86,7 @@ def step_impl(context, name, type, condition): res = requests.post(API_URL+'/v1/mediaItems', files=files, headers=headers) context.response = res context.mediaitem_type = type + time.sleep(int(seconds)) @when('upload {name} {type} mediaitem with auth if does not exist and wait {seconds} seconds') def step_impl(context, name, type, seconds): diff --git a/tests/features/steps/search.py b/tests/features/steps/search.py index 2328d45..16ff47c 100644 --- a/tests/features/steps/search.py +++ b/tests/features/steps/search.py @@ -4,12 +4,12 @@ from common import API_URL -@when('search for mediaitems {condition} auth') -def step_impl(context, condition): +@when('search query {query} for mediaitems {condition} auth') +def step_impl(context, query, condition): headers = None if condition == 'with': headers = {'Authorization': f'Bearer {context.access_token}'} - res = requests.get(API_URL+'/v1/search?q=pizza', headers=headers) + res = requests.get(API_URL+'/v1/search?q='+query, headers=headers) context.response = res context.mediaitems = res.json() @@ -17,3 +17,8 @@ def step_impl(context, condition): def step_impl(context): assert len(context.mediaitems) == 1 assert context.mediaitem_id == context.mediaitems[0]['id'] + +@then('searched mediaitem is not present in list') +def step_impl(context): + assert len(context.mediaitems) == 0 + assert context.mediaitem_id not in [mediaitem['id'] for mediaitem in context.mediaitems] diff --git a/tests/features/things.feature b/tests/features/things.feature index 4d81681..1b1c4a7 100644 --- a/tests/features/things.feature +++ b/tests/features/things.feature @@ -4,7 +4,7 @@ Feature: Things Given a user default is created if does not exist When user default logs in Then token is generated - When upload default photo mediaitem with auth if does not exist and wait 10 seconds + When upload default photo mediaitem with auth if does not exist and wait 5 seconds Then mediaitem is uploaded or exists Scenario: Validate Things diff --git a/worker/src/providers/faces/pytorch.py b/worker/src/providers/faces/pytorch.py index 7a60b90..f17e441 100644 --- a/worker/src/providers/faces/pytorch.py +++ b/worker/src/providers/faces/pytorch.py @@ -14,6 +14,8 @@ def __init__(self, params: dict) -> None: os.environ['TORCH_HOME'] = '/' try: os.symlink('/models/faces/', '/checkpoints') + except FileExistsError: + pass except Exception as exp: logging.error(f'error creating symlink: {str(exp)}') self.prob_threshold = float(params['face_threshold'])