Skip to content

Commit 43c74d6

Browse files
feat: Taxonomy suggestions API v3 for packaging shapes and materials (#8008)
1 parent 5dce3c0 commit 43c74d6

File tree

63 files changed

+1904
-274
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1904
-274
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ RUN \
190190
done && \
191191
chown www-data:www-data -R /mnt/podata && \
192192
# Create symlinks of data files that are indeed conf data in /mnt/podata (because we currently mix data and conf data)
193-
for path in ecoscore emb_codes forest-footprint ingredients packager-codes po taxonomies templates; do \
193+
for path in data-default ecoscore emb_codes forest-footprint ingredients packager-codes po taxonomies templates; do \
194194
ln -sf /opt/product-opener/${path} /mnt/podata/${path}; \
195195
done && \
196196
# Create some necessary files to ensure permissions in volumes

cgi/product_multilingual.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ ($product_ref, $field, $language)
684684
if (defined $tags_fields{$fieldtype}) {
685685
$class = "tagify-me";
686686
if ((defined $taxonomy_fields{$fieldtype}) or ($fieldtype eq 'emb_codes')) {
687-
$autocomplete = "$formatted_subdomain/cgi/suggest.pl?tagtype=$fieldtype&";
687+
$autocomplete = "$formatted_subdomain/api/v3/taxonomy_suggestions?tagtype=$fieldtype";
688688
}
689689
}
690690

cgi/suggest.pl

100755100644
Lines changed: 26 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -26,178 +26,46 @@
2626
use CGI qw/:cgi :form escapeHTML/;
2727

2828
use ProductOpener::Config qw/:all/;
29-
use ProductOpener::Store qw/:all/;
30-
use ProductOpener::Index qw/:all/;
3129
use ProductOpener::Display qw/:all/;
32-
use ProductOpener::Users qw/:all/;
33-
use ProductOpener::Products qw/:all/;
34-
use ProductOpener::Food qw/:all/;
35-
use ProductOpener::Tags qw/:all/;
30+
use ProductOpener::TaxonomySuggestions qw/:all/;
3631
use ProductOpener::Lang qw/:all/;
37-
use ProductOpener::PackagerCodes qw/:all/;
3832
use ProductOpener::HTTP qw/:all/;
3933

4034
use CGI qw/:cgi :form escapeHTML/;
4135
use URI::Escape::XS;
4236
use Storable qw/dclone/;
43-
use Encode;
4437
use JSON::PP;
45-
use List::Util qw/min/;
38+
use Encode;
4639

4740
my $request_ref = ProductOpener::Display::init_request();
4841

49-
=head1 CGI script to auto-complete entries for tags
50-
51-
=head2 Request parameters
52-
53-
=head3 tagtype - the type of tag
54-
55-
=head3 string - string to search
56-
57-
=head3 term - term to search
58-
59-
If string and term are passed together, they are concatenated together as separate words
60-
61-
=head3 limit - max number of suggestions
62-
63-
Warning, we are currently doing a brute force search, so avoid setting it too high
64-
65-
=cut
66-
67-
my $tagtype = single_param('tagtype');
68-
my $string = decode utf8 => single_param('string');
69-
# searched term
70-
my $term = decode utf8 => single_param('term');
71-
72-
# search language code
73-
my $search_lc = $lc;
74-
# superseed by request parameter
75-
if (defined single_param('lc')) {
76-
$search_lc = single_param('lc');
77-
}
78-
79-
my $original_lc = $search_lc;
80-
81-
# if search begins with a language code, use it for search
82-
if ($term =~ /^(\w\w):/) {
83-
$search_lc = $1;
84-
$term = $';
85-
}
86-
87-
# max results
88-
my $limit = 25;
89-
# superseed by request parameter
90-
if (defined single_param('limit')) {
91-
# we put a hard limit however
92-
$limit = min(int(single_param('limit')), 400);
93-
}
94-
95-
my @suggestions = (); # Suggestions starting with the term
96-
my @suggestions_c = (); # Suggestions containing the term
97-
my @suggestions_f = (); # fuzzy suggestions
98-
99-
my $cache_max_age = 0;
100-
my $suggestions_count = 0;
101-
102-
# search for emb codes
103-
if ($tagtype eq 'emb_codes') {
104-
my $stringid = get_string_id_for_lang("no_language", normalize_packager_codes($term));
105-
my @tags = sort keys %packager_codes;
106-
foreach my $canon_tagid (@tags) {
107-
next if $canon_tagid !~ /^$stringid/;
108-
push @suggestions, normalize_packager_codes($canon_tagid);
109-
last if ++$suggestions_count >= $limit;
110-
}
111-
# add cache to request
112-
$cache_max_age = 3600;
113-
}
114-
else {
115-
# search for term in a taxonomy
116-
117-
# normalize string and term
118-
my $stringid = get_string_id_for_lang($search_lc, $string) . "-" . get_string_id_for_lang($search_lc, $term);
119-
# remove eventual leading or ending "-"
120-
$stringid =~ s/^-//;
121-
$stringid =~ s/^-$//;
122-
# fuzzy match whole words with eventual inter-words
123-
my $fuzzystringid = join(".*", split("-", $stringid));
124-
# all tags can be retrieve from the $translations_to hash
125-
my @tags = sort keys %{$translations_to{$tagtype}};
126-
foreach my $canon_tagid (@tags) {
127-
# just_synonyms are not real entries
128-
next if defined $just_synonyms{$tagtype}{$canon_tagid};
129-
130-
my $tag; # this is the content string
131-
my $tagid; # this is the tag
132-
133-
# search if the tag exists in target language
134-
if (defined $translations_to{$tagtype}{$canon_tagid}{$search_lc}) {
135-
136-
$tag = $translations_to{$tagtype}{$canon_tagid}{$search_lc};
137-
# TODO: explain why $tagid can be different from $canon_tagid
138-
$tagid = get_string_id_for_lang($search_lc, $tag);
139-
140-
# add language prefix if we are not searching current interface language
141-
if (not($search_lc eq $original_lc)) {
142-
$tag = $search_lc . ":" . $tag;
143-
}
144-
}
145-
# also search for special language code "xx" which is universal
146-
elsif (defined $translations_to{$tagtype}{$canon_tagid}{xx}) {
147-
$tag = $translations_to{$tagtype}{$canon_tagid}{xx};
148-
$tagid = get_string_id_for_lang("xx", $tag);
149-
}
150-
151-
if (defined $tag) {
152-
# matching at start, best matches
153-
if ($tagid =~ /^$stringid/) {
154-
push @suggestions, $tag;
155-
# only matches at start are considered
156-
$suggestions_count++;
157-
}
158-
# matching inside
159-
elsif ($tagid =~ /$stringid/) {
160-
push @suggestions_c, $tag;
161-
}
162-
# fuzzy match
163-
elsif ($tagid =~ /$fuzzystringid/) {
164-
push @suggestions_f, $tag;
165-
}
166-
# end as soon as we got enough
167-
last if $suggestions_count >= $limit;
168-
}
169-
}
170-
# add cache to request
171-
$cache_max_age = 3600;
172-
}
173-
# sort best suggestions
174-
@suggestions = sort @suggestions;
175-
# suggestions containing term
176-
my $contains_to_add = min($limit - (scalar @suggestions), scalar @suggestions_c) - 1;
177-
if ($contains_to_add >= 0) {
178-
push @suggestions, @suggestions_c[0 .. $contains_to_add];
179-
}
180-
# Suggestions as fuzzy match
181-
my $fuzzy_to_add = min($limit - (scalar @suggestions), scalar @suggestions_f) - 1;
182-
if ($fuzzy_to_add >= 0) {
183-
push @suggestions, @suggestions_f[0 .. $fuzzy_to_add];
184-
}
42+
my $search_lc = $request_ref->{lc};
43+
44+
# We need a taxonomy name to provide suggestions for
45+
my $tagtype = request_param($request_ref, "tagtype");
46+
47+
# The API accepts a string input in the "string" field or "term" field.
48+
# - term is used by the jquery Autocomplete widget: https://api.jqueryui.com/autocomplete/
49+
# Use "string" only if both are present.
50+
my $string = decode("utf8", (request_param($request_ref, 'string') || request_param($request_ref, 'term')));
51+
52+
# /cgi/suggest.pl supports only limited context (use /api/v3/taxonomy_suggestions to use richer context)
53+
my $context_ref = {country => $request_ref->{country},};
54+
55+
# Options define how many suggestions should be returned, in which format etc.
56+
my $options_ref = {limit => request_param($request_ref, 'limit')};
57+
58+
my @suggestions = get_taxonomy_suggestions($tagtype, $search_lc, $string, $context_ref, $options_ref);
59+
18560
my $data = encode_json(\@suggestions);
18661

18762
# send response
18863
write_cors_headers();
18964

190-
if ($cache_max_age) {
191-
print header(
192-
-type => 'application/json',
193-
-charset => 'utf-8',
194-
-cache_control => 'public, max-age=' . $cache_max_age,
195-
);
196-
}
197-
else {
198-
print header(
199-
-type => 'application/json',
200-
-charset => 'utf-8',
201-
);
202-
}
65+
print header(
66+
-type => 'application/json',
67+
-charset => 'utf-8',
68+
-cache_control => 'public, max-age=' . 60, # 1 minute cache
69+
);
70+
20371
print $data;

data-default/categories_stats/categories_packagings_stats.all.popular.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

docs/reference/api-v3.yml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,83 @@ paths:
225225
- an object sent in the packagings field will replace any pre-existing data.
226226
- an object sent in the field suffixed with _add (e.g. packagings_add) will be merged with any pre-existing data.
227227
parameters: []
228+
/api/v3/taxonomy_suggestions:
229+
parameters: []
230+
get:
231+
summary: Get taxonomy entries suggestions
232+
tags: []
233+
responses:
234+
'200':
235+
description: OK
236+
content:
237+
application/json:
238+
schema:
239+
allOf:
240+
- $ref: ./responses/response-status/response_status.yaml
241+
- type: object
242+
properties:
243+
suggestions:
244+
type: array
245+
description: Array of sorted strings suggestions in the language requested in the "lc" field.
246+
items:
247+
type: string
248+
operationId: get-api-v3-taxonomy_suggestions-taxonomy
249+
description: |-
250+
Open Food Facts uses multilingual [taxonomies](https://wiki.openfoodfacts.org/Global_taxonomies) to normalize entries for categories, labels, ingredients, packaging shapes / materials / recycling instructions and many more fields.
251+
252+
This API returns taxonomy entries suggestions that can be used in product edit forms, search forms etc. (for instance in autocomplete dropdowns using libraries like Tagify or select2 on the Web).
253+
254+
Suggestions filtering:
255+
256+
The string parameter allows to get only suggestions that contain a specific string (useful for autocomplete suggestions).
257+
258+
Suggestions ordering:
259+
260+
- For packaging shapes and materials, suggestions are ordered first by the number of packaging components they appear in (restricted by country, categories and shape (for materials) if they are passed as parameters).
261+
- for all other taxonomies, results are ordered alphabetically
262+
263+
If a string is passed, an additional sort is done to put first suggestions that start with the string, followed by suggestions with a word that start with the string, and then suggestions that contain the string anywhere.
264+
parameters:
265+
- $ref: ./api.yml#/components/parameters/tagtype
266+
- schema:
267+
type: string
268+
example: en
269+
in: query
270+
name: lc
271+
description: 2 letter code of the language used for suggestions and for matching the input string
272+
- schema:
273+
type: string
274+
example: pe
275+
in: query
276+
name: string
277+
description: 'Optional string used to filter suggestions (useful for autocomplete). If passed, suggestions starting with the string will be returned first, followed by suggestions matching the string at the beginning of a word, and suggestions matching the string inside a word.'
278+
- schema:
279+
type: string
280+
in: query
281+
name: cc
282+
description: '2 letter country code, used to return popular packaging shapes and materials for products sold in the country'
283+
- schema:
284+
type: string
285+
example: yougurts
286+
in: query
287+
name: categories
288+
description: 'Comma separated list of categories tags (e.g. "en:fats,en:unsalted-butters" or categories names in the language indicated by the "lc" field (e.g. "graisses, beurres salés" in French)'
289+
- schema:
290+
type: string
291+
example: bottle
292+
in: query
293+
name: shape
294+
description: 'Shape of packaging component (tag identified in the packaging_shapes taxonomy, or plain text tag name in the language indicated by the "lc" field)'
295+
- schema:
296+
type: string
297+
in: query
298+
name: limit
299+
description: 'Maximum number of suggestions. Default is 25, max is 400.'
300+
- schema:
301+
type: string
302+
in: query
303+
name: term
304+
description: Alias for the "string" parameter provided for backward compatibility. "string" takes precedence.
228305
components:
229306
schemas: null
230307
parameters: null

docs/reference/schemas/packagings/packagings-write.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,3 @@ description: |-
99
examples: []
1010
items:
1111
$ref: ./packaging_component-write.yaml
12-
readOnly: true
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
title: tagtype
2+
x-stoplight:
3+
id: cyaecslbj8x2i
4+
type: string
5+
enum:
6+
- additives_classes
7+
- additives
8+
- allergens
9+
- amino_acids
10+
- categories
11+
- countries
12+
- data_quality
13+
- data_quality
14+
- data_quality
15+
- data_quality
16+
- data_quality
17+
- data_quality
18+
- data_quality
19+
- food_groups
20+
- improvements
21+
- ingredients_analysis
22+
- ingredients_processing
23+
- ingredients
24+
- labels
25+
- languages
26+
- minerals
27+
- misc
28+
- nova_groups
29+
- nucleotides
30+
- nutrient_levels
31+
- nutrients
32+
- origins
33+
- other_nutritional_substances
34+
- packaging_materials
35+
- packaging_recycling
36+
- packaging
37+
- packaging_shapes
38+
- periods_after_opening
39+
- preservation
40+
- states
41+
- test
42+
- allergens
43+
- vitamins
44+
description: 'Identifier of a taxonomy. See https://wiki.openfoodfacts.org/Global_taxonomies and https://github.com/openfoodfacts/openfoodfacts-server/tree/main/taxonomies'
45+
examples: []

html/js/product-multilingual.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ function initializeTagifyInput(el) {
546546
let abortController;
547547
input.on("input", function(event) {
548548
const value = event.detail.value;
549-
input.settings.whitelist = []; // reset the whitelist
549+
input.whitelist = null; // reset the whitelist
550550

551551
if (el.dataset.autocomplete && el.dataset.autocomplete !== "") {
552552
// https://developer.mozilla.org/en-US/docs/Web/API/AbortController/abort
@@ -556,13 +556,13 @@ function initializeTagifyInput(el) {
556556

557557
abortController = new AbortController();
558558

559-
fetch(el.dataset.autocomplete + "term=" + value, {
559+
fetch(el.dataset.autocomplete + "&string=" + value, {
560560
signal: abortController.signal
561561
}).
562562
then((RES) => RES.json()).
563-
then(function(whitelist) {
564-
input.settings.whitelist = whitelist;
565-
input.dropdown.show.call(input, value); // render the suggestions dropdown
563+
then(function(json) {
564+
input.whitelist = json.suggestions;
565+
input.dropdown.show(value); // render the suggestions dropdown
566566
});
567567
}
568568
});

0 commit comments

Comments
 (0)