diff --git a/Dockerfile b/Dockerfile index 140b6e8951d3e..7cb5627edafa9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,11 +8,12 @@ ARG CPANMOPTS= ###################### # Base modperl image stage ###################### -FROM bitnami/minideb:buster AS modperl +FROM debian:bullseye AS modperl # Install cpm to install cpanfile dependencies RUN --mount=type=cache,id=apt-cache,target=/var/cache/apt set -x && \ - install_packages \ + apt update && \ + apt install -y \ apache2 \ apt-utils \ cpanminus \ @@ -73,6 +74,8 @@ RUN --mount=type=cache,id=apt-cache,target=/var/cache/apt set -x && \ # # Action::Retry libmath-fibonacci-perl \ + # EV - event loop + libev-perl \ # Algorithm::CheckDigits libprobe-perl-perl \ # CLDR::Number @@ -137,7 +140,6 @@ RUN --mount=type=cache,id=apt-cache,target=/var/cache/apt set -x && \ libtest-number-delta-perl \ libdevel-size-perl \ gnumeric \ - incron \ # for dev # gnu readline libreadline-dev \ @@ -203,10 +205,6 @@ RUN \ chown www-data:www-data -R /var/log # Install Product Opener from the workdir COPY --chown=www-data:www-data . /opt/product-opener/ -RUN \ - # www-data user shall be able to use incron - echo www-data >> /etc/incron.allow && \ - incrontab -u www-data /opt/product-opener/conf/incron.conf EXPOSE 80 COPY ./docker/docker-entrypoint.sh / diff --git a/Makefile b/Makefile index 53fb9fbda9a84..0d5518e8c8f43 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,8 @@ HOSTS=127.0.0.1 world.productopener.localhost fr.productopener.localhost static. DOCKER_COMPOSE=docker-compose --env-file=${ENV_FILE} # we run tests in a specific project name to be separated from dev instances # we also publish mongodb on a separate port to avoid conflicts -DOCKER_COMPOSE_TEST=COMPOSE_PROJECT_NAME=po_test PO_COMMON_PREFIX=test_ MONGO_EXPOSE_PORT=27027 docker-compose --env-file=${ENV_FILE} +# we also enable the possibility to fake services in po_test_runner +DOCKER_COMPOSE_TEST=ROBOTOFF_URL="http://backend:8881/" GOOGLE_CLOUD_VISION_API_URL="http://backend:8881/" COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME}_test PO_COMMON_PREFIX=test_ MONGO_EXPOSE_PORT=27027 docker-compose --env-file=${ENV_FILE} .DEFAULT_GOAL := dev @@ -238,7 +239,8 @@ integration_test: @echo "🥫 Running unit tests …" # we launch the server and run tests within same container # we also need dynamicfront for some assets to exists - ${DOCKER_COMPOSE_TEST} up -d memcached postgres mongodb backend dynamicfront +# this is the place where variables are important + ${DOCKER_COMPOSE_TEST} up -d memcached postgres mongodb backend dynamicfront incron # note: we need the -T option for ci (non tty environment) ${DOCKER_COMPOSE_TEST} exec -T backend prove -l -r tests/integration ${DOCKER_COMPOSE_TEST} stop @@ -258,7 +260,7 @@ test-unit: guard-test # usage: make test-int test=test-name.t test-int: guard-test # usage: make test-one test=test-file.t @echo "🥫 Running test: 'tests/integration/${test}' …" - ${DOCKER_COMPOSE_TEST} up -d memcached postgres mongodb backend dynamicfront + ${DOCKER_COMPOSE_TEST} up -d memcached postgres mongodb backend dynamicfront incron ${DOCKER_COMPOSE_TEST} exec backend perl tests/integration/${test} # better shutdown, for if we do a modification of the code, we need a restart ${DOCKER_COMPOSE_TEST} stop backend @@ -267,9 +269,13 @@ test-int: guard-test # usage: make test-one test=test-file.t stop_tests: ${DOCKER_COMPOSE_TEST} stop +# clean tests, remove containers and volume (useful if you changed env variables, etc.) +clean_tests: + ${DOCKER_COMPOSE_TEST} down -v --remove-orphans + update_tests_results: @echo "🥫 Updated expected test results with actuals for easy Git diff" - ${DOCKER_COMPOSE_TEST} up -d memcached postgres mongodb backend dynamicfront + ${DOCKER_COMPOSE_TEST} up -d memcached postgres mongodb backend dynamicfront incron ${DOCKER_COMPOSE_TEST} exec -T -w /opt/product-opener/tests backend bash update_tests_results.sh ${DOCKER_COMPOSE_TEST} stop diff --git a/conf/apache.conf b/conf/apache.conf index 2b34b1430f90d..573c43cc4f697 100644 --- a/conf/apache.conf +++ b/conf/apache.conf @@ -21,6 +21,7 @@ PerlPassEnv EVENTS_PASSWORD PerlPassEnv REDIS_URL PerlPassEnv MONGODB_HOST PerlPassEnv GOOGLE_CLOUD_VISION_API_KEY +PerlPassEnv GOOGLE_CLOUD_VISION_API_URL PerlPassEnv CROWDIN_PROJECT_IDENTIFIER PerlPassEnv CROWDIN_PROJECT_KEY PerlPassEnv GEOLITE2_PATH diff --git a/cpanfile b/cpanfile index 256ded815dd7f..466a819381ebd 100644 --- a/cpanfile +++ b/cpanfile @@ -84,6 +84,10 @@ requires 'Log::Any::Adapter::Log4perl', '>= 0.09'; # liblog-any-adapter-log4perl requires 'Action::CircuitBreaker'; requires 'Action::Retry'; # deps: libmath-fibonacci-perl +# AnyEvent +requires 'AnyEvent'; +requires 'AnyEvent::Inotify::Simple'; + on 'test' => sub { requires 'Test::More', '>= 1.302186, < 2.0'; requires 'Test::MockModule'; @@ -100,6 +104,8 @@ on 'test' => sub { requires 'Devel::Cover'; requires 'Devel::Cover::Report::Codecov'; requires 'Devel::Cover::Report::Codecovbash'; + requires 'Test::Fake::HTTPD'; + requires 'URL::Encode'; }; on 'develop' => sub { diff --git a/docker-compose.yml b/docker-compose.yml index d7f6b5b441a3f..446907c8ad681 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,6 +20,7 @@ x-backend-conf: &backend-conf - EVENTS_PASSWORD - REDIS_URL - GOOGLE_CLOUD_VISION_API_KEY + - GOOGLE_CLOUD_VISION_API_URL - CROWDIN_PROJECT_IDENTIFIER - CROWDIN_PROJECT_KEY - GEOLITE2_PATH @@ -85,10 +86,8 @@ services: - "$MINION_QUEUE" incron: <<: *backend-conf - # This service run the incron jobs - # Only root can run incron - user: root - command: ["incrond", "-n"] + # This service watch for new images to trigger ocr and robotoff processing + command: ["perl", "scripts/run_cloud_vision_ocr.pl", "/mnt/podata/new_images"] frontend: image: ghcr.io/openfoodfacts/openfoodfacts-server/frontend:${TAG} depends_on: diff --git a/lib/ProductOpener/APITest.pm b/lib/ProductOpener/APITest.pm index 0e4f616d92d11..8b64b86300e49 100644 --- a/lib/ProductOpener/APITest.pm +++ b/lib/ProductOpener/APITest.pm @@ -52,6 +52,7 @@ BEGIN { &wait_dynamic_front &execute_api_tests &wait_server + &fake_http_server ); # symbols to export on request %EXPORT_TAGS = (all => [@EXPORT_OK]); } @@ -61,6 +62,7 @@ use vars @EXPORT_OK; use ProductOpener::TestDefaults qw/:all/; use ProductOpener::Test qw/:all/; use ProductOpener::Mail qw/ $LOG_EMAIL_START $LOG_EMAIL_END /; +use ProductOpener::Store qw/store retrieve/; use Test::More; use LWP::UserAgent; @@ -71,6 +73,7 @@ use JSON::PP; use Carp qw/confess/; use Clone qw/clone/; use File::Tail; +use Test::Fake::HTTPD; # Constants of the test website main domain and url # Should be used internally only (see: construct_test_url to build urls in tests) @@ -153,6 +156,8 @@ Return a user agent sub new_client () { my $jar = HTTP::CookieJar::LWP->new; my $ua = LWP::UserAgent->new(cookie_jar => $jar); + # set a neutral user-agent, for it may appear in some results + $ua->agent("Product-opener-tests/1.0"); return $ua; } @@ -594,7 +599,7 @@ Especially we replace "3D=" for "=" and join line and their continuation =head4 $mail text of mail =head3 Returns -Reformated text +Reformatted text =cut sub mail_to_text ($mail) { @@ -620,10 +625,11 @@ ref to an array of lines of the email sub normalize_mail_for_comparison ($mail) { # remove boundaries + $DB::single = 1; my $text = mail_to_text($mail); - my @boundaries = $text =~ m/boundary="([^"]+)"/g; + my @boundaries = $text =~ m/boundary=([^ ,\n\t]+)/g; foreach my $boundary (@boundaries) { - $text =~ s/$boundary/\\"--boundary--\\"/g; + $text =~ s/$boundary/boundary/g; } # replace generic dates $text =~ s/\d\d\d\d-\d\d-\d\d/--date--/g; @@ -634,4 +640,74 @@ sub normalize_mail_for_comparison ($mail) { return \@lines; } +=head2 fake_http_server($port, $dump_path, $responses_ref) { + +Launch a fake HTTP server. + +We use that to simulate Robotoff or any HTTP API in integration tests. +As it will be launched on the local backend container, we have to pretend +those service URL is on C. + +You can provide a list of responses to simulate real service responses, +while requests sent are store for later checks by the tests. + +=head3 parameters + +=head4 $dump_path - path + +A temporary directory to dump requests + +You can retrieve requests, in this directory as C + +=head4 $responses_ref - ref to a list + +List of responses to send, in right order, for each received request. + +If the number of request exceed this list, +we will send simple 200 HTTP responses with a json payload. + +=head3 returns ref to fake server + +Hold the reference until you don't need the server + +=cut + +sub fake_http_server ($port, $dump_path, $responses_ref) { + + # dump responses + my $resp_num = 0; + foreach my $resp (@$responses_ref) { + store("$dump_path/resp-$resp_num.sto", $resp); + $resp_num += 1; + } + + my $httpd = Test::Fake::HTTPD->new( + timeout => 1000, + listen => 10, + host => "0.0.0.0", + port => $port, + ); + + $httpd->run( + sub { + my $req = shift; + my @dumped_reqs = glob("$dump_path/req-*.sto"); + my $num_req = scalar @dumped_reqs; + # dump request to the folder + store("$dump_path/req-$num_req.sto", $req); + # look for an eventual response + my $response_ref; + if (-e "$dump_path/resp-$num_req.sto") { + $response_ref = retrieve("$dump_path/resp-$num_req.sto"); + } + else { + # an ok response + $response_ref = HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"foo": "blah"}'); + } + return $response_ref; + } + ); + return $httpd; +} + 1; diff --git a/lib/ProductOpener/Config2_docker.pm b/lib/ProductOpener/Config2_docker.pm index a863633bb2de6..f822c1ef44eed 100755 --- a/lib/ProductOpener/Config2_docker.pm +++ b/lib/ProductOpener/Config2_docker.pm @@ -43,6 +43,7 @@ BEGIN { $mongodb_timeout_ms $memd_servers $google_cloud_vision_api_key + $google_cloud_vision_api_url $crowdin_project_identifier $crowdin_project_key $robotoff_url @@ -86,6 +87,8 @@ $mongodb_timeout_ms = 50000; # config option max_time_ms/maxTimeMS $memd_servers = ["memcached:11211"]; $google_cloud_vision_api_key = $ENV{GOOGLE_CLOUD_VISION_API_KEY}; +$google_cloud_vision_api_url = $ENV{GOOGLE_CLOUD_VISION_API_URL} || "https://vision.googleapis.com/v1/images:annotate"; + $crowdin_project_identifier = $ENV{CROWDIN_PROJECT_IDENTIFIER}; $crowdin_project_key = $ENV{CROWDIN_PROJECT_KEY}; diff --git a/lib/ProductOpener/Config_obf.pm b/lib/ProductOpener/Config_obf.pm index 63ff4deb2f1f1..7a6c76f5ae44f 100644 --- a/lib/ProductOpener/Config_obf.pm +++ b/lib/ProductOpener/Config_obf.pm @@ -43,6 +43,7 @@ BEGIN { $producers_email $google_cloud_vision_api_key + $google_cloud_vision_api_url $crowdin_project_identifier $crowdin_project_key @@ -196,6 +197,7 @@ $conf_root = $ProductOpener::Config2::conf_root; $geolite2_path = $ProductOpener::Config2::geolite2_path; $google_cloud_vision_api_key = $ProductOpener::Config2::google_cloud_vision_api_key; +$google_cloud_vision_api_url = $ProductOpener::Config2::google_cloud_vision_api_url; $crowdin_project_identifier = $ProductOpener::Config2::crowdin_project_identifier; $crowdin_project_key = $ProductOpener::Config2::crowdin_project_key; diff --git a/lib/ProductOpener/Config_off.pm b/lib/ProductOpener/Config_off.pm index e5f7497b48b84..f8b448083a7b1 100644 --- a/lib/ProductOpener/Config_off.pm +++ b/lib/ProductOpener/Config_off.pm @@ -42,6 +42,7 @@ BEGIN { $producers_email $google_cloud_vision_api_key + $google_cloud_vision_api_url $crowdin_project_identifier $crowdin_project_key @@ -343,6 +344,7 @@ $conf_root = $ProductOpener::Config2::conf_root; $geolite2_path = $ProductOpener::Config2::geolite2_path; $google_cloud_vision_api_key = $ProductOpener::Config2::google_cloud_vision_api_key; +$google_cloud_vision_api_url = $ProductOpener::Config2::google_cloud_vision_api_url; $crowdin_project_identifier = $ProductOpener::Config2::crowdin_project_identifier; $crowdin_project_key = $ProductOpener::Config2::crowdin_project_key; diff --git a/lib/ProductOpener/Config_opf.pm b/lib/ProductOpener/Config_opf.pm index b90ef572b7e1a..8961a119f490a 100644 --- a/lib/ProductOpener/Config_opf.pm +++ b/lib/ProductOpener/Config_opf.pm @@ -43,6 +43,7 @@ BEGIN { $producers_email $google_cloud_vision_api_key + $google_cloud_vision_api_url $crowdin_project_identifier $crowdin_project_key @@ -194,6 +195,7 @@ $conf_root = $ProductOpener::Config2::conf_root; $geolite2_path = $ProductOpener::Config2::geolite2_path; $google_cloud_vision_api_key = $ProductOpener::Config2::google_cloud_vision_api_key; +$google_cloud_vision_api_url = $ProductOpener::Config2::google_cloud_vision_api_url; $crowdin_project_identifier = $ProductOpener::Config2::crowdin_project_identifier; $crowdin_project_key = $ProductOpener::Config2::crowdin_project_key; diff --git a/lib/ProductOpener/Config_opff.pm b/lib/ProductOpener/Config_opff.pm index f31ba123b8590..14b70e5481011 100644 --- a/lib/ProductOpener/Config_opff.pm +++ b/lib/ProductOpener/Config_opff.pm @@ -43,6 +43,7 @@ BEGIN { $producers_email $google_cloud_vision_api_key + $google_cloud_vision_api_url $crowdin_project_identifier $crowdin_project_key @@ -193,6 +194,7 @@ $conf_root = $ProductOpener::Config2::conf_root; $geolite2_path = $ProductOpener::Config2::geolite2_path; $google_cloud_vision_api_key = $ProductOpener::Config2::google_cloud_vision_api_key; +$google_cloud_vision_api_url = $ProductOpener::Config2::google_cloud_vision_api_url; $crowdin_project_identifier = $ProductOpener::Config2::crowdin_project_identifier; $crowdin_project_key = $ProductOpener::Config2::crowdin_project_key; diff --git a/lib/ProductOpener/Images.pm b/lib/ProductOpener/Images.pm index 46701638194cb..7d9dcb9f31c80 100644 --- a/lib/ProductOpener/Images.pm +++ b/lib/ProductOpener/Images.pm @@ -56,10 +56,8 @@ Same image saved with a maximum width and height of 100 and 400 pixels. Those th OCR output from Google Cloud Vision. -When a new image is uploaded, a symbolic link to it is created in /new_images. This triggers a script to generate and save the OCR: - -incrontab -l -u off -/srv/off/new_images IN_ATTRIB,IN_CREATE,IN_MOVED_TO /srv/off/scripts/process_new_image_off.sh $@/$# +When a new image is uploaded, a symbolic link to it is created in /new_images. +This triggers a script to generate and save the OCR: C. =item [front|ingredients|nutrition|packaging]_[2 letter language code].[product revision].[full|100|200|400].jpg @@ -111,6 +109,11 @@ BEGIN { &display_image_thumb &extract_text_from_image + &send_image_to_cloud_vision + &send_image_to_robotoff + + @CLOUD_VISION_FEATURES_FULL + @CLOUD_VISION_FEATURES_TEXT ); # symbols to export on request %EXPORT_TAGS = (all => [@EXPORT_OK]); @@ -621,7 +624,7 @@ Format: [front|ingredients|nutrition|packaging|other]_[2 letter language code] =head4 Comment $comment -=head4 Reference to an imgid $img_id +=head4 Reference to an image id $img_id Used to return the number identifying the image to the caller. @@ -1996,93 +1999,198 @@ sub extract_text_from_image ($product_ref, $id, $field, $ocr_engine, $results_re else { $log->warn("no available tesseract dictionary", {lc => $lc, lan => $lan, id => $id}) if $log->is_warn(); } - } elsif ($ocr_engine eq 'google_cloud_vision') { - my $url = "https://vision.googleapis.com/v1/images:annotate?key=" - . $ProductOpener::Config::google_cloud_vision_api_key; + my $json_file = "$www_root/images/products/$path/$filename.json"; + open(my $gv_logs, ">>:encoding(UTF-8)", "$data_root/logs/cloud_vision.log"); + my $cloudvision_ref = send_image_to_cloud_vision($image, $json_file, \@CLOUD_VISION_FEATURES_TEXT, $gv_logs); + close $gv_logs; - my $ua = LWP::UserAgent->new(); + if ( (defined $cloudvision_ref->{responses}) + and (defined $cloudvision_ref->{responses}[0]) + and (defined $cloudvision_ref->{responses}[0]{fullTextAnnotation}) + and (defined $cloudvision_ref->{responses}[0]{fullTextAnnotation}{text})) + { - open(my $IMAGE, "<", $image) || die "Could not read $image: $!\n"; - binmode($IMAGE); - local $/; - my $image_data = do {local $/; <$IMAGE>}; # https://www.perlmonks.org/?node_id=287647 - close $IMAGE; + $log->debug("text found in google cloud vision response") if $log->is_debug(); - my $api_request_ref = { - requests => [ - { - features => [{type => 'TEXT_DETECTION'}], - # image => { source => { imageUri => $image_url}} - image => {content => encode_base64($image_data)} - } - ] - }; - my $json = encode_json($api_request_ref); + $results_ref->{$field} = $cloudvision_ref->{responses}[0]{fullTextAnnotation}{text}; + $results_ref->{$field . "_annotations"} = $cloudvision_ref; + $results_ref->{status} = 0; + $product_ref->{images}{$id}{ocr} = 1; + $product_ref->{images}{$id}{orientation} + = compute_orientation_from_cloud_vision_annotations($cloudvision_ref); + } + else { + $product_ref->{images}{$id}{ocr} = 0; + } + } + return; +} - my $request = HTTP::Request->new(POST => $url); - $request->header('Content-Type' => 'application/json'); - $request->content($json); +@CLOUD_VISION_FEATURES_FULL = ( + {type => 'TEXT_DETECTION'}, + {type => 'LOGO_DETECTION'}, + {type => 'LABEL_DETECTION'}, + {type => 'SAFE_SEARCH_DETECTION'}, + {type => 'FACE_DETECTION'}, +); - my $res = $ua->request($request); - # $log->info("google cloud vision response", { json_response => $res->decoded_content, api_token => $ProductOpener::Config::google_cloud_vision_api_key }); +@CLOUD_VISION_FEATURES_TEXT = ({type => 'TEXT_DETECTION'}); - if ($res->is_success) { +=head2 send_image_to_cloud_vision ($image_path, $json_file, $features_ref, $gv_logs) - $log->info("request to google cloud vision was successful") if $log->is_info(); +Call to Google Cloud vision API - open(my $OUT, ">>:encoding(UTF-8)", "$data_root/logs/cloud_vision.log"); - print $OUT "success\t" . $image_url . "\t" . $res->code . "\n"; - close $OUT; +=head3 Arguments - my $json_response = $res->decoded_content; +=head4 $image_path - str path to image - my $cloudvision_ref = decode_json($json_response); +=head4 $json_file - str path to the file where we will store OCR result as JSON - my $json_file = "$www_root/images/products/$path/$filename.json"; +=head4 $features_ref - hash reference - the "features" parameter of Google Cloud Vision - $log->info("saving google cloud vision json response to file", {path => $json_file}) if $log->is_info(); +This determine which detection will be performed. +Remember each feature is a cost. - # UTF-8 issue , see https://stackoverflow.com/questions/4572007/perl-lwpuseragent-mishandling-utf-8-response - $json_response = decode("utf8", $json_response); +C<@CLOUD_VISION_FEATURES_FULL> and C<@CLOUD_VISION_FEATURES_TEXT> are two constant you can use. - open($OUT, ">:encoding(UTF-8)", $json_file); - print $OUT $json_response; - close $OUT; +=head4 $gv_logs - file handle - if ( (defined $cloudvision_ref->{responses}) - and (defined $cloudvision_ref->{responses}[0]) - and (defined $cloudvision_ref->{responses}[0]{fullTextAnnotation}) - and (defined $cloudvision_ref->{responses}[0]{fullTextAnnotation}{text})) - { +A file where we write additional logs, specific to the service. - $log->debug("text found in google cloud vision response") if $log->is_debug(); +=head3 Response - $results_ref->{$field} = $cloudvision_ref->{responses}[0]{fullTextAnnotation}{text}; - $results_ref->{$field . "_annotations"} = $cloudvision_ref; - $results_ref->{status} = 0; - $product_ref->{images}{$id}{ocr} = 1; - $product_ref->{images}{$id}{orientation} - = compute_orientation_from_cloud_vision_annotations($cloudvision_ref); - } - else { - $product_ref->{images}{$id}{ocr} = 0; +Return JSON content of the response. + +=cut + +sub send_image_to_cloud_vision ($image_path, $json_file, $features_ref, $gv_logs) { + + my $url + = $ProductOpener::Config::google_cloud_vision_api_url . "?key=" + . $ProductOpener::Config::google_cloud_vision_api_key; + print($gv_logs "CV:sending to $url\n"); + + my $ua = LWP::UserAgent->new(); + + open(my $IMAGE, "<", $image_path) || die "Could not read $image_path: $!\n"; + binmode($IMAGE); + local $/; + my $image_data = do {local $/; <$IMAGE>}; # https://www.perlmonks.org/?node_id=287647 + close $IMAGE; + + my $api_request_ref = { + requests => [ + { + features => $features_ref, + # image => { source => { imageUri => $image_url}} + image => {content => encode_base64($image_data)}, } + ] + }; + my $json = encode_json($api_request_ref); + + my $request = HTTP::Request->new(POST => $url); + $request->header('Content-Type' => 'application/json'); + $request->content($json); + + my $cloud_vision_response = $ua->request($request); + # $log->info("google cloud vision response", { json_response => $cloud_vision_response->decoded_content, api_token => $ProductOpener::Config::google_cloud_vision_api_key }); + + my $cloudvision_ref = undef; + if ($cloud_vision_response->is_success) { + + $log->info("request to google cloud vision was successful for $image_path") if $log->is_info(); + + my $json_response = $cloud_vision_response->decoded_content(charset => 'UTF-8'); + + $cloudvision_ref = decode_json($json_response); + + $log->info("saving google cloud vision json response to file", {path => $json_file}) if $log->is_info(); + # UTF-8 issue , see https://stackoverflow.com/questions/4572007/perl-lwpuseragent-mishandling-utf-8-response + $json_response = decode("utf8", $json_response); + + if (open(my $OUT, ">:encoding(UTF-8)", $json_file)) { + print($OUT $json_response); + close($OUT); + + print($gv_logs "--> cloud vision success for $image_path\n"); } else { - $log->warn("google cloud vision request not successful", {code => $res->code, response => $res->message}) - if $log->is_warn(); - - open(my $OUT, ">>:encoding(UTF-8)", "$data_root/logs/cloud_vision.log"); - print $OUT "error\t" . $image_url . "\t" . $res->code . "\t" . $res->message . "\n"; - close $OUT; + $log->error("Cannot write $json_file: $!\n"); + print($gv_logs "Cannot write $json_file: $!\n"); } + } + else { + $log->warn( + "google cloud vision request not successful", + { + code => $cloud_vision_response->code, + image_path => $image_path, + response => $cloud_vision_response->message + } + ) if $log->is_warn(); + print $gv_logs "error\t" + . $image_path . "\t" + . $cloud_vision_response->code . "\t" + . $cloud_vision_response->message . "\n"; + } + return $cloudvision_ref; - return; +} + +=head2 send_image_to_robotoff ($code, $image_url, $json_url, $api_server_domain) + +Send a notification about a new image (already gone through OCR) to Robotoff + +=head3 Arguments + +=head4 $code - product code + +=head4 $image_url - public url of the image + +=head4 $json_url - public url of OCR result as JSON + +=head4 $api_server_domain - the API url for this product opener instance + +=head3 Response + +Return Robotoff HTTP::Response object. + +=cut + +sub send_image_to_robotoff ($code, $image_url, $json_url, $api_server_domain) { + + my $ua = LWP::UserAgent->new(); + + my $robotoff_response = $ua->post( + $robotoff_url . "/api/v1/images/import", + { + 'barcode' => $code, + 'image_url' => $image_url, + 'ocr_url' => $json_url, + 'server_domain' => $api_server_domain, + } + ); + + if ($robotoff_response->is_success) { + $log->info("request to robotoff was successful") if $log->is_info(); + } + else { + $log->warn( + "robotoff request not successful", + { + code => $robotoff_response->code, + response => $robotoff_response->message, + status_line => $robotoff_response->status_line + } + ) if $log->is_warn(); + } + return $robotoff_response; } 1; diff --git a/lib/ProductOpener/Test.pm b/lib/ProductOpener/Test.pm index a6be075be5439..65875e4f3ebfa 100644 --- a/lib/ProductOpener/Test.pm +++ b/lib/ProductOpener/Test.pm @@ -49,6 +49,7 @@ BEGIN { &remove_all_users &remove_all_orgs &check_not_production + &wait_for ); # symbols to export on request %EXPORT_TAGS = (all => [@EXPORT_OK]); } @@ -84,7 +85,7 @@ There are two modes: one to update expected results, and one to test against the =head3 Parameters =head4 String $filepath -The path of the file containing the tetst. +The path of the file containing the test. Generally should be
__FILE__
within the test. @@ -169,6 +170,10 @@ sub remove_all_products () { if (@$err) { confess("not able to remove some products directories: " . join(":", @$err)); } + remove_tree("$www_root/images/products", {keep_root => 1, error => \$err}); + if (@$err) { + confess("not able to remove some products directories: " . join(":", @$err)); + } } =head2 remove_all_users () @@ -266,7 +271,7 @@ sub ensure_expected_results_dir ($expected_results_dir, $update_expected_results return 1; } -=head2 compare_to_expected_results($object_ref, $expected_results_file, $update_expected_results) { +=head2 compare_to_expected_results($object_ref, $expected_results_file, $update_expected_results, $test_ref = undef) { Compare an object (e.g. product data or an API result) to expected results. @@ -287,7 +292,7 @@ and the new expected results can be diffed / committed in GitHub. =head4 $test_ref - an optional reference to an object describing the test case -If the test fail, the test reference will be output in the diag +If the test fail, the test reference will be output in the C =cut @@ -645,4 +650,38 @@ sub normalize_org_for_test_comparison ($org_ref) { return; } +=head2 wait_for($code, $timeout=3, $poll_time=1) +Wait for an event to happen, up to a certain amount of time + +=head3 parameters + +=head4 $code - sub + +This must be the code that check for the event and return a true value if it succeed, false otherwise + +=head4 $timeout - float + +how many seconds to wait (default 3s) + +=head4 $poll_time - float + +how much time to wait between checks + +=cut + +sub wait_for ($code, $timeout = 3, $poll_time = 1) { + my $spent_time = 0; + my $success = undef; + while ((!$success) && $spent_time < $timeout) { + $success = $code->(); + if ($success) { + return 1; + } + sleep $poll_time; + $spent_time += $poll_time; + } + # last try + return $code->(); +} + 1; diff --git a/scripts/process_new_image_off.sh b/scripts/process_new_image_off.sh deleted file mode 100755 index ab004b96d31e8..0000000000000 --- a/scripts/process_new_image_off.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh - -# script to process images with cloud vision and to call computer vision -# algorithm -# to be executed through incron: -# incrontab -e -u off -# /srv/off/new_images IN_CREATE /srv/off/scripts/process_new_image_off.sh $@/$# - - -if [ -z "$PERL5LIB" ] && [ -f /tmp/env-export.sh ] -then - # we are in docker and we need all env variables (as incron remove envs for users) - . /tmp/env-export.sh -fi - -export PERL5LIB="/srv/off/lib/:${PERL5LIB}" -DEFAULT_MOUNT_PATH=/srv/off -MOUNT_PATH="${2:-$DEFAULT_MOUNT_PATH}" - -${MOUNT_PATH}/scripts/run_cloud_vision_ocr.pl $1 - - diff --git a/scripts/run_cloud_vision_ocr.pl b/scripts/run_cloud_vision_ocr.pl index 7e4b1e6c2ef8e..68376a3a952f4 100755 --- a/scripts/run_cloud_vision_ocr.pl +++ b/scripts/run_cloud_vision_ocr.pl @@ -22,151 +22,115 @@ # This script is meant to be called through process_new_image_off.sh, itself run through an icrontab -use Modern::Perl '2017'; -use utf8; +use ProductOpener::PerlStandards -binmode(STDOUT, ":encoding(UTF-8)"); + binmode(STDOUT, ":encoding(UTF-8)"); use ProductOpener::Config qw/:all/; -use ProductOpener::Store qw/:all/; - -use CGI qw/:cgi :form escapeHTML/; -use URI::Escape::XS; -use Storable qw/dclone/; -use Encode; -use JSON::PP; -use LWP::UserAgent; -use MIME::Base64; +use ProductOpener::Images qw/:all/; + +use AnyEvent::Inotify::Simple; use Log::Any qw($log); use Log::Any::Adapter 'TAP'; - -# 1551113262.3302748614028.front_fr.30.jpg +use EV; open(my $LOG, ">>", "$data_root/logs/run_cloud_vision_ocr.log"); -my $file = $ARGV[0]; - -my $destination = readlink $file; +sub send_file_to_ocr ($file) { + my $destination = readlink $file; -if (not defined $destination) { - $log->error("Error: destination is not a valid symlink to an image file", - {file => $file, destination => $destination}) - if $log->is_error(); - print $LOG "ERROR: file: $file -> destination: $destination is not a valid symlink to an image file\n"; - exit(); -} + if (not defined $destination) { + $log->error("Error: destination is not a valid symlink to an image file", {file => $file}) + if $log->is_error(); + print $LOG "ERROR: file: $file is not a valid symlink to an image file\n"; + return; + } -my $code; + # compute arguments -if ($file =~ /^([^\.]*)\.(\d+)\./) { - $code = $2; -} + my $code; -my $path = $destination; -$path =~ s/.*\/images/\/images/; + if ($file =~ /^([^\.]*)\.(\d+)\./) { + $code = $2; + } -my $auth = ""; -if ($server_domain =~ /^dev\./) { - $auth = "off:off@"; -} + my $path = $destination; + $path =~ s/.*\/images/\/images/; -my $image_url = "https://" . $auth . "static." . $server_domain . $path; -my $json_url = $image_url; -$json_url =~ s/\.([^\.]+)$//; -$json_url .= ".json"; - -my $json_file = $destination; -$json_file =~ s/\.([^\.]+)$//; -$json_file .= ".json"; - -print $LOG "file: $file destination: $destination code: $code image_url: $image_url json_file: $json_file\n"; - -my $url = "https://vision.googleapis.com/v1/images:annotate?key=" . $ProductOpener::Config::google_cloud_vision_api_key; -# alpha-vision.googleapis.com/ - -my $ua = LWP::UserAgent->new(); - -open(my $IMAGE, "<", $file) || die "Could not read $file: $!\n"; -binmode($IMAGE); -local $/; -my $image = <$IMAGE>; -close $IMAGE; - -my $api_request_ref = { - requests => [ - { - features => [ - {type => 'TEXT_DETECTION'}, - {type => 'LOGO_DETECTION'}, - {type => 'LABEL_DETECTION'}, - {type => 'SAFE_SEARCH_DETECTION'}, - {type => 'FACE_DETECTION'} - ], - image => {content => encode_base64($image)} - } - ] -}; -my $json = encode_json($api_request_ref); + my $auth = ""; + if ($server_domain =~ /^dev\./) { + $auth = "off:off@"; + } -my $request = HTTP::Request->new(POST => $url); -$request->header('Content-Type' => 'application/json'); -$request->content($json); + my $image_url = "https://" . $auth . "static." . $server_domain . $path; + my $json_url = $image_url; + $json_url =~ s/\.([^\.]+)$//; + $json_url .= ".json"; -my $cloud_vision_response = $ua->request($request); + my $json_file = $destination; + $json_file =~ s/\.([^\.]+)$//; + $json_file .= ".json"; -if ($cloud_vision_response->is_success) { + print $LOG "file: $file destination: $destination code: $code image_url: $image_url json_file: $json_file\n"; + open(my $gv_logs, ">>:encoding(UTF-8)", "$data_root/logs/cloud_vision.log"); - $log->info("request to google cloud vision was successful") if $log->is_info(); + my $cloudvision_ref = send_image_to_cloud_vision($file, $json_file, \@CLOUD_VISION_FEATURES_FULL, $gv_logs); - my $json_response = $cloud_vision_response->decoded_content(charset => 'UTF-8'); + if (defined $cloudvision_ref) { - # my $cloudvision_ref = decode_json($json_response); + # Call robotoff to process the image and/or json from Cloud Vision + my $robotoff_response = send_image_to_robotoff($code, $image_url, $json_url, $auth . "api." . $server_domain); + if ($robotoff_response->is_success) { + print $LOG "--> robotoff success: " . $robotoff_response->decoded_content . "\n"; + } + else { + print $LOG "--> robotoff error: " . $robotoff_response->status_line . "\n"; + } - # UTF-8 issue , see https://stackoverflow.com/questions/4572007/perl-lwpuseragent-mishandling-utf-8-response - $json_response = decode("utf8", $json_response); + unlink($file); + } + return; +} - open(my $OUT, ">:encoding(UTF-8)", $json_file) - or die("Cannot write $json_file: $!\n"); - print $OUT $json_response; - close $OUT; +sub robust_send_file_to_ocr ($file) { + eval {send_file_to_ocr($file);}; + if ($@) { + $log->error("send_file_to_ocr failed for $file: $@") if $log->is_error(); + } + return; +} - print $LOG "--> cloud vision success\n"; +sub run ($images_dir) { + my $inotify = AnyEvent::Inotify::Simple->new( + directory => $images_dir, + wanted_events => [qw(create move)], + event_receiver => sub { + my ($event, $file, $moved_to) = @_; + if ($event eq 'create') { + robust_send_file_to_ocr("$images_dir/$file"); + } + }, + ); - # Call robotoff to process the image and/or json from Cloud Vision + # call event loop + EV::run(); + return; +} - my $robotoff_response = $ua->post( - $robotoff_url . "/api/v1/images/import", - { - 'barcode' => $code, - 'image_url' => $image_url, - 'ocr_url' => $json_url, - 'server_domain' => $auth . "api." . $server_domain - } - ); +sub main() { + # first argument is the directory to watch + my $images_dir = $ARGV[0]; - if ($robotoff_response->is_success) { - $log->info("request to robotoff was successful") if $log->is_info(); - print $LOG "--> robotoff success: " . $robotoff_response->decoded_content . "\n"; - } - else { - $log->warn( - "robotoff request not successful", - { - code => $robotoff_response->code, - response => $robotoff_response->message, - status_line => $robotoff_response->status_line - } - ) if $log->is_warn(); - print $LOG "--> robotoff error: " . $robotoff_response->status_line . "\n"; + # signal handler for TERM, KILL, QUIT + foreach my $sig (qw/TERM KILL QUIT/) { + EV::signal $sig, sub { + print "Exiting after receiving $sig"; + exit(0); + }; } - unlink($file); -} -else { - $log->warn("google cloud vision request not successful", - {code => $cloud_vision_response->code, response => $cloud_vision_response->message}) - if $log->is_warn(); - print $LOG "--> cloud vision error: $cloud_vision_response->code $cloud_vision_response->message\n"; + run($images_dir); + return; } -close $LOG; +main(); diff --git a/stop_words.txt b/stop_words.txt index a05a3e4df634c..630dcbd3de4e5 100644 --- a/stop_words.txt +++ b/stop_words.txt @@ -13,6 +13,7 @@ api appid aromatisées arôme +backend backticks barcode barcodes diff --git a/taxonomies/nutrient_levels.txt b/taxonomies/nutrient_levels.txt index f35623980445e..29efd6045e32d 100644 --- a/taxonomies/nutrient_levels.txt +++ b/taxonomies/nutrient_levels.txt @@ -2231,4 +2231,4 @@ yi:זאלץ in high quantity yo:Iyọ̀ in high quantity za:Salt in high quantity zh:食盐在最高含量中 -zu:Salt in high quantity \ No newline at end of file +zu:Salt in high quantity diff --git a/tests/integration/expected_test_results/create_pro_user/mails.json b/tests/integration/expected_test_results/create_pro_user/mails.json index cc45ecb447ef9..55e3696e6a3d6 100644 --- a/tests/integration/expected_test_results/create_pro_user/mails.json +++ b/tests/integration/expected_test_results/create_pro_user/mails.json @@ -5,14 +5,14 @@ "From: \"Open Food Facts\" ", "To: \"Open Food Facts\" ", "Subject: New pro account for new org acme-inc - tests", - "Content-Type: multipart/alternative; boundary=\"\\\"--boundary--\\\"\"", + "Content-Type: multipart/alternative; boundary=boundary", "Content-Transfer-Encoding: 7bit", "", "", - "--\\\"--boundary--\\\"", + "--boundary", "Date: ***", "MIME-Version: 1.0", - "Content-Type: text/plain; charset=\"utf-8\"", + "Content-Type: text/plain; charset=utf-8", "Content-Transfer-Encoding: quoted-printable", "", "", @@ -75,10 +75,10 @@ "", "", "", - "--\\\"--boundary--\\\"", + "--boundary", "Date: ***", "MIME-Version: 1.0", - "Content-Type: text/html; charset=\"utf-8\"", + "Content-Type: text/html; charset=utf-8", "Content-Transfer-Encoding: quoted-printable", "", "", @@ -141,12 +141,12 @@ "", "", "", - "--\\\"--boundary--\\\"--" + "--boundary--" ], [ "Date: ***", "MIME-Version: 1.0", - "Content-Type: text/plain; charset=\"utf-8\"", + "Content-Type: text/plain; charset=utf-8", "Content-Transfer-Encoding: quoted-printable", "From: \"Open Food Facts\" ", "To: Test ", @@ -179,7 +179,7 @@ [ "Date: ***", "MIME-Version: 1.0", - "Content-Type: text/plain; charset=\"utf-8\"", + "Content-Type: text/plain; charset=utf-8", "Content-Transfer-Encoding: quoted-printable", "From: \"Open Food Facts\" ", "To: \"Open Food Facts\" ", diff --git a/tests/integration/expected_test_results/create_pro_user/user-after-subscription.json b/tests/integration/expected_test_results/create_pro_user/user-after-subscription.json index 316bfb98037a0..07bdd28a2c288 100644 --- a/tests/integration/expected_test_results/create_pro_user/user-after-subscription.json +++ b/tests/integration/expected_test_results/create_pro_user/user-after-subscription.json @@ -6,7 +6,7 @@ "encrypted_password" : "--ignore--", "initial_cc" : "world", "initial_lc" : "en", - "initial_user_agent" : "libwww-perl/6.36", + "initial_user_agent" : "Product-opener-tests/1.0", "ip" : "--ignore--", "name" : "Test", "newsletter" : "", diff --git a/tests/integration/expected_test_results/run_cloud_vision_ocr/ocr_data.json b/tests/integration/expected_test_results/run_cloud_vision_ocr/ocr_data.json new file mode 100644 index 0000000000000..1f8d1d822dd56 --- /dev/null +++ b/tests/integration/expected_test_results/run_cloud_vision_ocr/ocr_data.json @@ -0,0 +1,3 @@ +{ + "ocr" : "success" +} diff --git a/tests/integration/expected_test_results/run_cloud_vision_ocr/ocr_request_body.json b/tests/integration/expected_test_results/run_cloud_vision_ocr/ocr_request_body.json new file mode 100644 index 0000000000000..6dd2b18ad3862 --- /dev/null +++ b/tests/integration/expected_test_results/run_cloud_vision_ocr/ocr_request_body.json @@ -0,0 +1,26 @@ +{ + "requests" : [ + { + "features" : [ + { + "type" : "TEXT_DETECTION" + }, + { + "type" : "LOGO_DETECTION" + }, + { + "type" : "LABEL_DETECTION" + }, + { + "type" : "SAFE_SEARCH_DETECTION" + }, + { + "type" : "FACE_DETECTION" + } + ], + "image" : { + "content" : "/9j/4AAQSkZJRgABAQAAAQABAAD/4QJsRXhpZgAASUkqAAgAAAAKAAsAAgAOAAAAhgAAAAABCQAB\nAAAABAAAAAEBCQABAAAABAAAABIBCQABAAAAAQAAABoBCQABAAAASAAAABsBCQABAAAASAAAACgB\nCQABAAAAAgAAADIBAgAUAAAAlAAAABMCCQABAAAAAQAAAGmHBAABAAAAqAAAAPYAAABnVGh1bWIg\nMy4xMi4yADIwMjM6MDM6MTYgMTg6MjM6MzkABgAAkAcABAAAADAyMjEBkQcABAAAAAECAwAAoAcA\nBAAAADAxMDABoAkAAQAAAAEAAAACoAkAAQAAAAQAAAADoAkAAQAAAAQAAAAAAAAABgADAQMAAQAA\nAAYAAAAaAQkAAQAAAEgAAAAbAQkAAQAAAEgAAAAoAQkAAQAAAAIAAAABAgQAAQAAAEQBAAACAgQA\nAQAAAB8BAAAAAAAA/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcH\nBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4I\nCA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/\nwAARCAAEAAQDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAj/xAAUEAEAAAAAAAAAAAAA\nAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAABAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMB\nAAIRAxEAPwCoAE0t/9kA/9sAQwAFAwQEBAMFBAQEBQUFBgcMCAcHBwcPCwsJDBEPEhIRDxERExYc\nFxMUGhURERghGBodHR8fHxMXIiQiHiQcHh8e/9sAQwEFBQUHBgcOCAgOHhQRFB4eHh4eHh4eHh4e\nHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e/8AAEQgABAAEAwEiAAIRAQMR\nAf/EABUAAQEAAAAAAAAAAAAAAAAAAAAI/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/EABUBAQEAAAAA\nAAAAAAAAAAAAAAQF/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEQMRAD8AqABNLf/Z\n" + } + } + ] +} diff --git a/tests/integration/expected_test_results/run_cloud_vision_ocr/robotoff_request_body.json b/tests/integration/expected_test_results/run_cloud_vision_ocr/robotoff_request_body.json new file mode 100644 index 0000000000000..ac5e47428a9ea --- /dev/null +++ b/tests/integration/expected_test_results/run_cloud_vision_ocr/robotoff_request_body.json @@ -0,0 +1,6 @@ +{ + "barcode" : "3000000000001", + "image_url" : "https://static.openfoodfacts.localhost/images/products/300/000/000/0001/2.jpg", + "ocr_url" : "https://static.openfoodfacts.localhost/images/products/300/000/000/0001/2.json", + "server_domain" : "api.openfoodfacts.localhost" +} diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/1.100.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.100.jpg new file mode 100644 index 0000000000000..211364722432d Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.100.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/1.400.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.400.jpg new file mode 100644 index 0000000000000..010d5f4d951d9 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.400.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/1.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.jpg new file mode 100644 index 0000000000000..c739f4d84ae59 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/1.jpg.orig b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.jpg.orig new file mode 100644 index 0000000000000..1b5a691e87d25 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.jpg.orig differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/1.json b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.json new file mode 100644 index 0000000000000..28346040793dc --- /dev/null +++ b/tests/integration/inputs/sample-products-images/300/000/000/0001/1.json @@ -0,0 +1,75 @@ +{ + "responses": [ + { + "labelAnnotations": [ + { + "mid": "/m/07xgrh", + "description": "Ingredient", + "score": 0.86167634, + "topicality": 0.86167634 + }, + { + "mid": "/m/01ykh", + "description": "Cuisine", + "score": 0.8451197, + "topicality": 0.8451197 + }, + { + "mid": "/m/02wbm", + "description": "Food", + "score": 0.8418686, + "topicality": 0.8418686 + }, + { + "mid": "/m/02q08p0", + "description": "Dish", + "score": 0.81159383, + "topicality": 0.81159383 + }, + { + "mid": "/m/083vt", + "description": "Wood", + "score": 0.7561794, + "topicality": 0.7561794 + }, + { + "mid": "/m/0hwky", + "description": "Pattern", + "score": 0.7537876, + "topicality": 0.7537876 + }, + { + "mid": "/m/036qh8", + "description": "Produce", + "score": 0.7300633, + "topicality": 0.7300633 + }, + { + "mid": "/m/08tlbj", + "description": "Natural foods", + "score": 0.6109067, + "topicality": 0.6109067 + }, + { + "mid": "/m/04jdhxy", + "description": "Superfood", + "score": 0.60629594, + "topicality": 0.60629594 + }, + { + "mid": "/m/02t2zd", + "description": "Cobblestone", + "score": 0.59688926, + "topicality": 0.59688926 + } + ], + "safeSearchAnnotation": { + "adult": "VERY_UNLIKELY", + "spoof": "VERY_UNLIKELY", + "medical": "UNLIKELY", + "violence": "VERY_UNLIKELY", + "racy": "VERY_UNLIKELY" + } + } + ] +} diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.100.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.100.jpg new file mode 100644 index 0000000000000..3e429f3d43141 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.100.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.200.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.200.jpg new file mode 100644 index 0000000000000..07bd66685d7f1 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.200.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.400.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.400.jpg new file mode 100644 index 0000000000000..07bd66685d7f1 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.400.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.full.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.full.jpg new file mode 100644 index 0000000000000..31745b7a21f0e Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.3.full.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.100.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.100.jpg new file mode 100644 index 0000000000000..3e429f3d43141 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.100.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.200.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.200.jpg new file mode 100644 index 0000000000000..07bd66685d7f1 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.200.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.400.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.400.jpg new file mode 100644 index 0000000000000..07bd66685d7f1 Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.400.jpg differ diff --git a/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.full.jpg b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.full.jpg new file mode 100644 index 0000000000000..31745b7a21f0e Binary files /dev/null and b/tests/integration/inputs/sample-products-images/300/000/000/0001/front_en.4.full.jpg differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/1.sto b/tests/integration/inputs/sample-products/300/000/000/0001/1.sto new file mode 100644 index 0000000000000..93be51c35c1a8 Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/1.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/2.sto b/tests/integration/inputs/sample-products/300/000/000/0001/2.sto new file mode 100644 index 0000000000000..e9a6bdef29a3b Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/2.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/3.sto b/tests/integration/inputs/sample-products/300/000/000/0001/3.sto new file mode 100644 index 0000000000000..8bad8ce28d579 Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/3.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/4.sto b/tests/integration/inputs/sample-products/300/000/000/0001/4.sto new file mode 100644 index 0000000000000..0cb0c50f779ac Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/4.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/5.sto b/tests/integration/inputs/sample-products/300/000/000/0001/5.sto new file mode 100644 index 0000000000000..82c22a67cefe4 Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/5.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/changes.sto b/tests/integration/inputs/sample-products/300/000/000/0001/changes.sto new file mode 100644 index 0000000000000..5c0f500bb2921 Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/changes.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/images.sto b/tests/integration/inputs/sample-products/300/000/000/0001/images.sto new file mode 100644 index 0000000000000..9e2fad5a9b1fe Binary files /dev/null and b/tests/integration/inputs/sample-products/300/000/000/0001/images.sto differ diff --git a/tests/integration/inputs/sample-products/300/000/000/0001/product.sto b/tests/integration/inputs/sample-products/300/000/000/0001/product.sto new file mode 120000 index 0000000000000..c4e921bcb5c88 --- /dev/null +++ b/tests/integration/inputs/sample-products/300/000/000/0001/product.sto @@ -0,0 +1 @@ +5.sto \ No newline at end of file diff --git a/tests/integration/inputs/small-img.jpg b/tests/integration/inputs/small-img.jpg new file mode 100644 index 0000000000000..1fffa7827b716 Binary files /dev/null and b/tests/integration/inputs/small-img.jpg differ diff --git a/tests/integration/run_cloud_vision_ocr.t b/tests/integration/run_cloud_vision_ocr.t new file mode 100644 index 0000000000000..35b732caf9ab4 --- /dev/null +++ b/tests/integration/run_cloud_vision_ocr.t @@ -0,0 +1,70 @@ +use ProductOpener::PerlStandards; + +use Test::More; +use Test::MockModule; +use HTTP::Headers; +use HTTP::Response; +use File::Basename "dirname"; +use File::Slurp; +use JSON; +use File::Temp (); +use File::Copy::Recursive qw(dircopy fcopy); +use Test::Fake::HTTPD qw/run_http_server/; +use URL::Encode qw/url_params_mixed/; + +use ProductOpener::Store qw/:all/; +use ProductOpener::Config qw/:all/; + +use ProductOpener::APITest qw/:all/; +use ProductOpener::Test qw/:all/; + +my ($test_id, $test_dir, $expected_result_dir, $update_expected_results) = (init_expected_results(__FILE__)); + +remove_all_products(); +wait_application_ready(); + +# a very small image to avoid having too large request json object +my $sample_products_path = dirname(__FILE__) . "/inputs/sample-products/"; +my $sample_products_images_path = dirname(__FILE__) . "/inputs/sample-products-images/"; +my $product_code_path = "300/000/000/0001"; +my $input_image_path = dirname(__FILE__) . "/inputs/small-img.jpg"; + +# Note: we can't use a full test uploading an image through the server right now, +# because I don't have the time :-D +# add a sample product +dircopy("$sample_products_path/$product_code_path", "$data_root/products/$product_code_path"); +my $image_dir = "$www_root/images/products/$product_code_path"; +dircopy("$sample_products_images_path/$product_code_path", $image_dir); +# add an image +fcopy($input_image_path, "$image_dir/2.jpg"); +# fake responses for OCR and robtoff +my @responses = ( + HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"ocr": "success"}'), + HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"robotoff": "success"}'), +); +my $dump_path = File::Temp->newdir(); +# start fake server +my $httpd = fake_http_server(8881, $dump_path, \@responses); +# link image - this should trigger the script +symlink("$image_dir/2.jpg", "$data_root/new_images/" . time() . "." . "3000000000001.other.2.jpg"); +# wait until we got a response or fail +ok(wait_for(sub {return (-e "$dump_path/req-1.sto");}, 5), "OCR and robotoff called"); +$httpd = undef; # stop server +# verify it's done +my @requests = glob("$dump_path/req-*.sto"); +is(scalar @requests, 2, "Two request issued"); +my $ocr_request = retrieve("$dump_path/req-0.sto"); +my $request_json_body = decode_json($ocr_request->content()); +compare_to_expected_results($request_json_body, "$expected_result_dir/ocr_request_body.json", $update_expected_results); +my $ocr_content = read_file("$image_dir/2.json"); +ok($ocr_content, "OCR file is not empty"); +my $ocr_data = decode_json($ocr_content); +compare_to_expected_results($ocr_data, "$expected_result_dir/ocr_data.json", $update_expected_results); +my $robotoff_request = retrieve("$dump_path/req-1.sto"); +# we have url encoded parameters, and order might change --> convert to hash +my $request_content = url_params_mixed($robotoff_request->content()); +compare_to_expected_results($request_content, "$expected_result_dir/robotoff_request_body.json", + $update_expected_results); + +done_testing(); + diff --git a/tests/unit/expected_test_results/ingredients/fr-marmelade.json b/tests/unit/expected_test_results/ingredients/fr-marmelade.json index 8646a4cc365b2..13b5942b65cca 100644 --- a/tests/unit/expected_test_results/ingredients/fr-marmelade.json +++ b/tests/unit/expected_test_results/ingredients/fr-marmelade.json @@ -311,11 +311,11 @@ "en:palm-fat" ], "en:vegan-status-unknown" : [ - "en:orange-marmalade", + "en:orange-marmalade", "en:sodium-citrate" ], "en:vegetarian-status-unknown" : [ - "en:orange-marmalade", + "en:orange-marmalade", "en:sodium-citrate" ] }, @@ -326,71 +326,73 @@ ], "ingredients_hierarchy" : [ "en:orange-marmalade", - "en:chocolate", - "en:wheat-flour", - "en:cereal", - "en:flour", - "en:wheat", - "en:cereal-flour", - "en:sugar", - "en:added-sugar", - "en:disaccharide", - "en:egg", - "en:glucose-fructose-syrup", - "en:monosaccharide", - "en:fructose", - "en:glucose", - "en:colza-oil", - "en:oil-and-fat", - "en:vegetable-oil-and-fat", - "en:rapeseed-oil", - "en:raising-agent", - "en:salt","en:emulsifier", - "en:orange-pulp", - "en:fruit", - "en:citrus-fruit", - "en:orange", - "en:concentrated-orange-juice", - "en:fruit-juice", - "en:orange-juice", - "en:gelling-agent", - "en:acid", - "en:acidity-regulator", - "en:natural-orange-flavouring", - "en:flavouring", - "en:natural-flavouring", - "en:thickener", - "en:cocoa-paste", - "en:cocoa", - "en:cocoa-butter", - "en:illipe-oil", - "en:vegetable-fat", - "en:mango-kernel-oil", - "en:vegetable-oil", - "en:shorea-robusta-seed-oil", - "en:shea-butter", - "en:palm-fat", - "en:palm-oil-and-fat", - "en:lactose-and-milk-proteins", - "en:protein", - "en:animal-protein", - "en:milk-proteins", - "en:lactose", - "en:e503ii", - "en:e503", - "en:e450i", - "en:e450", - "en:e500ii", - "en:e500", - "en:soya-lecithin", - "en:e322", - "en:e322i", - "en:e440a", - "en:e330","en:e333", - "en:sodium-citrate", - "en:minerals", - "en:sodium", - "en:e415" + "en:chocolate", + "en:wheat-flour", + "en:cereal", + "en:flour", + "en:wheat", + "en:cereal-flour", + "en:sugar", + "en:added-sugar", + "en:disaccharide", + "en:egg", + "en:glucose-fructose-syrup", + "en:monosaccharide", + "en:fructose", + "en:glucose", + "en:colza-oil", + "en:oil-and-fat", + "en:vegetable-oil-and-fat", + "en:rapeseed-oil", + "en:raising-agent", + "en:salt", + "en:emulsifier", + "en:orange-pulp", + "en:fruit", + "en:citrus-fruit", + "en:orange", + "en:concentrated-orange-juice", + "en:fruit-juice", + "en:orange-juice", + "en:gelling-agent", + "en:acid", + "en:acidity-regulator", + "en:natural-orange-flavouring", + "en:flavouring", + "en:natural-flavouring", + "en:thickener", + "en:cocoa-paste", + "en:cocoa", + "en:cocoa-butter", + "en:illipe-oil", + "en:vegetable-fat", + "en:mango-kernel-oil", + "en:vegetable-oil", + "en:shorea-robusta-seed-oil", + "en:shea-butter", + "en:palm-fat", + "en:palm-oil-and-fat", + "en:lactose-and-milk-proteins", + "en:protein", + "en:animal-protein", + "en:milk-proteins", + "en:lactose", + "en:e503ii", + "en:e503", + "en:e450i", + "en:e450", + "en:e500ii", + "en:e500", + "en:soya-lecithin", + "en:e322", + "en:e322i", + "en:e440a", + "en:e330", + "en:e333", + "en:sodium-citrate", + "en:minerals", + "en:sodium", + "en:e415" ], "ingredients_n" : 41, "ingredients_n_tags" : [ @@ -465,9 +467,9 @@ "en:salt", "en:emulsifier", "en:orange-pulp", - "en:fruit", - "en:citrus-fruit", - "en:orange", + "en:fruit", + "en:citrus-fruit", + "en:orange", "en:concentrated-orange-juice", "en:fruit-juice", "en:orange-juice", diff --git a/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data.json b/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data.json new file mode 100644 index 0000000000000..07046c2aa314e --- /dev/null +++ b/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data.json @@ -0,0 +1,3 @@ +{ + "foo" : "blah" +} diff --git a/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data_2.json b/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data_2.json new file mode 100644 index 0000000000000..8dd659ba8030f --- /dev/null +++ b/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data_2.json @@ -0,0 +1,3 @@ +{ + "foo" : "bar" +} diff --git a/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data_3.json b/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data_3.json new file mode 100644 index 0000000000000..8dd659ba8030f --- /dev/null +++ b/tests/unit/expected_test_results/send_image_to_cloud_vision/ocr_data_3.json @@ -0,0 +1,3 @@ +{ + "foo" : "bar" +} diff --git a/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body.json b/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body.json new file mode 100644 index 0000000000000..6dd2b18ad3862 --- /dev/null +++ b/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body.json @@ -0,0 +1,26 @@ +{ + "requests" : [ + { + "features" : [ + { + "type" : "TEXT_DETECTION" + }, + { + "type" : "LOGO_DETECTION" + }, + { + "type" : "LABEL_DETECTION" + }, + { + "type" : "SAFE_SEARCH_DETECTION" + }, + { + "type" : "FACE_DETECTION" + } + ], + "image" : { + "content" : "/9j/4AAQSkZJRgABAQAAAQABAAD/4QJsRXhpZgAASUkqAAgAAAAKAAsAAgAOAAAAhgAAAAABCQAB\nAAAABAAAAAEBCQABAAAABAAAABIBCQABAAAAAQAAABoBCQABAAAASAAAABsBCQABAAAASAAAACgB\nCQABAAAAAgAAADIBAgAUAAAAlAAAABMCCQABAAAAAQAAAGmHBAABAAAAqAAAAPYAAABnVGh1bWIg\nMy4xMi4yADIwMjM6MDM6MTYgMTg6MjM6MzkABgAAkAcABAAAADAyMjEBkQcABAAAAAECAwAAoAcA\nBAAAADAxMDABoAkAAQAAAAEAAAACoAkAAQAAAAQAAAADoAkAAQAAAAQAAAAAAAAABgADAQMAAQAA\nAAYAAAAaAQkAAQAAAEgAAAAbAQkAAQAAAEgAAAAoAQkAAQAAAAIAAAABAgQAAQAAAEQBAAACAgQA\nAQAAAB8BAAAAAAAA/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcH\nBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4I\nCA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/\nwAARCAAEAAQDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAj/xAAUEAEAAAAAAAAAAAAA\nAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAABAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMB\nAAIRAxEAPwCoAE0t/9kA/9sAQwAFAwQEBAMFBAQEBQUFBgcMCAcHBwcPCwsJDBEPEhIRDxERExYc\nFxMUGhURERghGBodHR8fHxMXIiQiHiQcHh8e/9sAQwEFBQUHBgcOCAgOHhQRFB4eHh4eHh4eHh4e\nHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e/8AAEQgABAAEAwEiAAIRAQMR\nAf/EABUAAQEAAAAAAAAAAAAAAAAAAAAI/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/EABUBAQEAAAAA\nAAAAAAAAAAAAAAQF/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEQMRAD8AqABNLf/Z\n" + } + } + ] +} diff --git a/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body_2.json b/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body_2.json new file mode 100644 index 0000000000000..6dd2b18ad3862 --- /dev/null +++ b/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body_2.json @@ -0,0 +1,26 @@ +{ + "requests" : [ + { + "features" : [ + { + "type" : "TEXT_DETECTION" + }, + { + "type" : "LOGO_DETECTION" + }, + { + "type" : "LABEL_DETECTION" + }, + { + "type" : "SAFE_SEARCH_DETECTION" + }, + { + "type" : "FACE_DETECTION" + } + ], + "image" : { + "content" : "/9j/4AAQSkZJRgABAQAAAQABAAD/4QJsRXhpZgAASUkqAAgAAAAKAAsAAgAOAAAAhgAAAAABCQAB\nAAAABAAAAAEBCQABAAAABAAAABIBCQABAAAAAQAAABoBCQABAAAASAAAABsBCQABAAAASAAAACgB\nCQABAAAAAgAAADIBAgAUAAAAlAAAABMCCQABAAAAAQAAAGmHBAABAAAAqAAAAPYAAABnVGh1bWIg\nMy4xMi4yADIwMjM6MDM6MTYgMTg6MjM6MzkABgAAkAcABAAAADAyMjEBkQcABAAAAAECAwAAoAcA\nBAAAADAxMDABoAkAAQAAAAEAAAACoAkAAQAAAAQAAAADoAkAAQAAAAQAAAAAAAAABgADAQMAAQAA\nAAYAAAAaAQkAAQAAAEgAAAAbAQkAAQAAAEgAAAAoAQkAAQAAAAIAAAABAgQAAQAAAEQBAAACAgQA\nAQAAAB8BAAAAAAAA/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcH\nBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4I\nCA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/\nwAARCAAEAAQDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAj/xAAUEAEAAAAAAAAAAAAA\nAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAABAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMB\nAAIRAxEAPwCoAE0t/9kA/9sAQwAFAwQEBAMFBAQEBQUFBgcMCAcHBwcPCwsJDBEPEhIRDxERExYc\nFxMUGhURERghGBodHR8fHxMXIiQiHiQcHh8e/9sAQwEFBQUHBgcOCAgOHhQRFB4eHh4eHh4eHh4e\nHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e/8AAEQgABAAEAwEiAAIRAQMR\nAf/EABUAAQEAAAAAAAAAAAAAAAAAAAAI/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/EABUBAQEAAAAA\nAAAAAAAAAAAAAAQF/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEQMRAD8AqABNLf/Z\n" + } + } + ] +} diff --git a/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body_3.json b/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body_3.json new file mode 100644 index 0000000000000..feae050e01c48 --- /dev/null +++ b/tests/unit/expected_test_results/send_image_to_cloud_vision/request_body_3.json @@ -0,0 +1,14 @@ +{ + "requests" : [ + { + "features" : [ + { + "type" : "TEXT_DETECTION" + } + ], + "image" : { + "content" : "/9j/4AAQSkZJRgABAQAAAQABAAD/4QJsRXhpZgAASUkqAAgAAAAKAAsAAgAOAAAAhgAAAAABCQAB\nAAAABAAAAAEBCQABAAAABAAAABIBCQABAAAAAQAAABoBCQABAAAASAAAABsBCQABAAAASAAAACgB\nCQABAAAAAgAAADIBAgAUAAAAlAAAABMCCQABAAAAAQAAAGmHBAABAAAAqAAAAPYAAABnVGh1bWIg\nMy4xMi4yADIwMjM6MDM6MTYgMTg6MjM6MzkABgAAkAcABAAAADAyMjEBkQcABAAAAAECAwAAoAcA\nBAAAADAxMDABoAkAAQAAAAEAAAACoAkAAQAAAAQAAAADoAkAAQAAAAQAAAAAAAAABgADAQMAAQAA\nAAYAAAAaAQkAAQAAAEgAAAAbAQkAAQAAAEgAAAAoAQkAAQAAAAIAAAABAgQAAQAAAEQBAAACAgQA\nAQAAAB8BAAAAAAAA/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcH\nBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4I\nCA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/\nwAARCAAEAAQDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAj/xAAUEAEAAAAAAAAAAAAA\nAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAABAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMB\nAAIRAxEAPwCoAE0t/9kA/9sAQwAFAwQEBAMFBAQEBQUFBgcMCAcHBwcPCwsJDBEPEhIRDxERExYc\nFxMUGhURERghGBodHR8fHxMXIiQiHiQcHh8e/9sAQwEFBQUHBgcOCAgOHhQRFB4eHh4eHh4eHh4e\nHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e/8AAEQgABAAEAwEiAAIRAQMR\nAf/EABUAAQEAAAAAAAAAAAAAAAAAAAAI/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/EABUBAQEAAAAA\nAAAAAAAAAAAAAAQF/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEQMRAD8AqABNLf/Z\n" + } + } + ] +} diff --git a/tests/unit/inputs/ocr-test.jpg b/tests/unit/inputs/ocr-test.jpg new file mode 100644 index 0000000000000..0cc6b68038c56 Binary files /dev/null and b/tests/unit/inputs/ocr-test.jpg differ diff --git a/tests/unit/inputs/small-img.jpg b/tests/unit/inputs/small-img.jpg new file mode 100644 index 0000000000000..1fffa7827b716 Binary files /dev/null and b/tests/unit/inputs/small-img.jpg differ diff --git a/tests/unit/send_image_to_cloud_vision.t b/tests/unit/send_image_to_cloud_vision.t new file mode 100644 index 0000000000000..4f483c823d176 --- /dev/null +++ b/tests/unit/send_image_to_cloud_vision.t @@ -0,0 +1,124 @@ +use ProductOpener::PerlStandards; + +use Test::More; +use Test::MockModule; +use File::Temp (); +use HTTP::Headers; +use HTTP::Response; +use File::Basename "dirname"; +use File::Slurp; +use JSON; + +use ProductOpener::Test qw/:all/; +use ProductOpener::Images qw/:all/; + +my ($test_id, $test_dir, $expected_result_dir, $update_expected_results) = (init_expected_results(__FILE__)); + +my @ua_requests = (); +# put responses for call to requests here, we will pop first +my @ua_responses = (); +# fake request for User-Agent module +sub fake_ua_request ($ua, $request_ref) { + push(@ua_requests, $request_ref); + return shift @ua_responses; +} + +# a very small image to avoid having too large request json object +my $image_path = dirname(__FILE__) . "/inputs/small-img.jpg"; + +{ + my $user_agent_module = Test::MockModule->new('LWP::UserAgent'); + # mock request + $user_agent_module->mock('request', \&fake_ua_request); + my $tmp_dir = File::Temp->newdir(); + my $gv_logs_path = $tmp_dir->dirname . "gv.log"; + + # normal test + open(my $gv_logs, ">:encoding(UTF-8)", $gv_logs_path); + my $json_path = $tmp_dir . "/small-img.json"; + # expected response + my $response = HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"foo": "blah"}'); + push @ua_responses, $response; + send_image_to_cloud_vision($image_path, $json_path, \@CLOUD_VISION_FEATURES_FULL, $gv_logs); + close($gv_logs); + is(scalar @ua_requests, 1, "Normal test - One request issued to cloud vision"); + my $issued_request = shift @ua_requests; + my $request_json_body = decode_json($issued_request->content()); + compare_to_expected_results($request_json_body, "$expected_result_dir/request_body.json", $update_expected_results); + my $ocr_content = read_file($json_path); + ok($ocr_content, "normal test - OCR file is not empty"); + my $ocr_data = decode_json($ocr_content); + compare_to_expected_results($ocr_data, "$expected_result_dir/ocr_data.json", $update_expected_results); + my $logs = read_file($gv_logs_path); + like($logs, qr/cloud vision success/, "normal test - cloud vision success in logs"); + + # test new request updates + open($gv_logs, ">:encoding(UTF-8)", $gv_logs_path); + $response = HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"foo": "bar"}'); + push @ua_responses, $response; + send_image_to_cloud_vision($image_path, $json_path, \@CLOUD_VISION_FEATURES_FULL, $gv_logs); + close($gv_logs); + is(scalar @ua_requests, 1, "test request update - One request issued to cloud vision"); + $issued_request = shift @ua_requests; + $request_json_body = decode_json($issued_request->content()); + compare_to_expected_results($request_json_body, "$expected_result_dir/request_body_2.json", + $update_expected_results); + $ocr_content = read_file($json_path); + $ocr_data = decode_json($ocr_content); + compare_to_expected_results($ocr_data, "$expected_result_dir/ocr_data_2.json", $update_expected_results); + $logs = read_file($gv_logs_path); + like($logs, qr/cloud vision success/, "test request update - cloud vision success in logs"); + + # test with different feature set \@CLOUD_VISION_FEATURES_TEXT + open($gv_logs, ">:encoding(UTF-8)", $gv_logs_path); + $response = HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"foo": "bar"}'); + push @ua_responses, $response; + send_image_to_cloud_vision($image_path, $json_path, \@CLOUD_VISION_FEATURES_TEXT, $gv_logs); + close($gv_logs); + is(scalar @ua_requests, 1, "test request features text - One request issued to cloud vision"); + $issued_request = shift @ua_requests; + $request_json_body = decode_json($issued_request->content()); + compare_to_expected_results($request_json_body, "$expected_result_dir/request_body_3.json", + $update_expected_results); + $ocr_content = read_file($json_path); + $ocr_data = decode_json($ocr_content); + compare_to_expected_results($ocr_data, "$expected_result_dir/ocr_data_3.json", $update_expected_results); + $logs = read_file($gv_logs_path); + like($logs, qr/cloud vision success/, "test request features text - cloud vision success in logs"); + + # test with bad json path + open($gv_logs, ">:encoding(UTF-8)", $gv_logs_path); + $response = HTTP::Response->new("200", "OK", HTTP::Headers->new(), '{"foo": "blah"}'); + push @ua_responses, $response; + send_image_to_cloud_vision( + $image_path, + "/var/lib/not-a-directory/not-writable.json", + \@CLOUD_VISION_FEATURES_FULL, $gv_logs + ); + close($gv_logs); + is(scalar @ua_requests, 1, "non writable json - One request issued to cloud vision"); + $issued_request = shift @ua_requests; + # log issued + $logs = read_file($gv_logs_path); + like($logs, qr|Cannot write /var/lib/not-a-directory/not-writable.json|, "non writable json - error logged"); + unlike($logs, qr/cloud vision success/, "non writable json - no cloud vision success in logs"); + + # test bad request + open($gv_logs, ">:encoding(UTF-8)", $gv_logs_path); + $json_path = $tmp_dir . "/small-img2.json"; + $response = HTTP::Response->new("403", "Not authorized", HTTP::Headers->new(), '{"foo": "blah"}'); + push @ua_responses, $response; + send_image_to_cloud_vision($image_path, $json_path, \@CLOUD_VISION_FEATURES_FULL, $gv_logs); + close($gv_logs); + is(scalar @ua_requests, 1, "request error - one request issued to cloud vision"); + $issued_request = shift @ua_requests; + # log issued + $logs = read_file($gv_logs_path); + like($logs, qr|error\ttests/unit/inputs/small-img.jpg\t403\tNot authorized|, "request not successfull logged"); + unlike($logs, qr/cloud vision success/, "request error - no cloud vision success in logs"); + # no json path + ok(!(-e $json_path), "request error - json file not created"); + +} + +done_testing(); diff --git a/tests/update_tests_results.sh b/tests/update_tests_results.sh index dfe9a1acefc7d..b1e6b906d336b 100755 --- a/tests/update_tests_results.sh +++ b/tests/update_tests_results.sh @@ -28,6 +28,7 @@ perl nutriscore.t --update-expected-results perl packaging.t --update-expected-results perl recipes.t --update-expected-results perl import_convert_carrefour_france.t --update-expected-results +perl tests/unit/send_image_to_cloud_vision.t --update-expected-results cd ..