Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: separate products_obsolete MongoDB collection for obsolete products #8277

Merged
merged 9 commits into from
Apr 5, 2023
4 changes: 4 additions & 0 deletions cgi/product_multilingual.pl
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,11 @@ ($product_ref)

# Obsolete products

# We test if the "obsolete_since_date" field is present, as the checkbox field won't be sent if the box is unchecked
if (($User{moderator} or $Owner_id) and (defined single_param('obsolete_since_date'))) {
# We need to temporarily record if the product was obsolete, so that we can remove it
# from the product or product_obsolete collection if its obsolete status changed
$product_ref->{was_obsolete} = $product_ref->{obsolete};
$product_ref->{obsolete} = remove_tags_and_quote(decode utf8 => single_param("obsolete"));
$product_ref->{obsolete_since_date} = remove_tags_and_quote(decode utf8 => single_param("obsolete_since_date"));
}
Expand Down
2 changes: 1 addition & 1 deletion lib/ProductOpener/Config2_sample.pm
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ $data_root = "/home/off";

$geolite2_path = '/usr/local/share/GeoLite2-Country/GeoLite2-Country.mmdb';

$mongodb = "off";
$mongodb = "off"; # MongoDB database name
$mongodb_host = "mongodb://localhost";
$mongodb_timeout_ms = 50000; # config option max_time_ms/maxTimeMS

Expand Down
50 changes: 40 additions & 10 deletions lib/ProductOpener/Data.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ improve performance of aggregate queries for an improved user experience and mor
collection was initially proposed in L<issue#1610|https://github.com/openfoodfacts/openfoodfacts-server/issues/1610> on
GitHub, where some additional context is available.

Obsolete products that have been withdrawn from the market have separate collections: products_obsolete and products_obsolete_tags

=cut

package ProductOpener::Data;
Expand All @@ -53,7 +55,6 @@ BEGIN {
&get_database
&get_collection
&get_products_collection
&get_products_tags_collection
&get_emb_codes_collection
&get_recent_changes_collection
&remove_documents_by_ids
Expand Down Expand Up @@ -115,24 +116,57 @@ sub execute_query ($sub) {
)->run();
}

=head2 get_products_collection()
=head2 get_products_collection( $options_ref )

C<get_products_collection()> establishes a connection to MongoDB and uses timeout as an argument. This then selects a collection
from within the database.

=head3 Arguments

This method takes in arguments of integer type (user defined timeout in milliseconds).
It is optional for this subroutine to have an argument.
This method takes parameters in an optional hash reference with the following keys:

=head4 database MongoDB database name

Defaults to $ProductOpener::Config::mongodb

This is useful when moving products to another flavour
(e.g. from Open Food Facts (database: off) to Open Beauty Facts (database: obf))

=head4 timeout User defined timeout in milliseconds

=head4 obsolete

If set to a true value, the function returns a collection that contains only obsolete products,
otherwise it returns the collection with products that are not obsolete.

=head4 tags

If set to a true value, the function may return a smaller collection that contains only the *_tags fields,
in order to speed aggregate queries. The smaller collection is created every night,
and may therefore contain slightly stale data.

As of 2023/03/13, we return the products_tags collection for non obsolete products.
For obsolete products, we currently return the products_obsolete collection, but we might
create a separate products_obsolete_tags collection in the future, if it becomes necessary to create one.

=head3 Return values

Returns a mongoDB collection object.

=cut

sub get_products_collection ($timeout = undef) {
return get_collection($mongodb, 'products', $timeout);
sub get_products_collection ($options_ref = {}) {
my $database = $options_ref->{database} // $mongodb;
my $collection = 'products';
if ($options_ref->{obsolete}) {
$collection .= '_obsolete';
}
# We don't have a products_obsolete_tags collection at this point
# if it changes, the following elsif should be changed to a if
elsif ($options_ref->{tags}) {
$collection .= '_tags';
}
return get_collection($database, $collection, $options_ref->{timeout});
}

=head2 get_products_tags_collection()
Expand All @@ -150,10 +184,6 @@ Returns a mongoDB collection.

=cut

sub get_products_tags_collection ($timeout = undef) {
return get_collection($mongodb, 'products_tags', $timeout);
}

sub get_emb_codes_collection ($timeout = undef) {
return get_collection($mongodb, 'emb_codes', $timeout);
}
Expand Down
46 changes: 30 additions & 16 deletions lib/ProductOpener/Display.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,8 @@ sub query_list_of_tags ($request_ref, $query_ref) {
if $log->is_debug();
$results = execute_query(
sub {
return get_products_collection()->aggregate($aggregate_parameters, {allowDiskUse => 1});
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->aggregate($aggregate_parameters, {allowDiskUse => 1});
}
);
};
Expand All @@ -1457,7 +1458,8 @@ sub query_list_of_tags ($request_ref, $query_ref) {
if $log->is_debug();
$results = execute_query(
sub {
return get_products_tags_collection()->aggregate($aggregate_parameters, {allowDiskUse => 1});
return get_products_collection({obsolete => request_param($request_ref, "obsolete"), tags => 1})
->aggregate($aggregate_parameters, {allowDiskUse => 1});
}
);
};
Expand Down Expand Up @@ -1524,7 +1526,7 @@ sub query_list_of_tags ($request_ref, $query_ref) {
if $log->is_debug();
$count_results = execute_query(
sub {
return get_products_collection()
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->aggregate($aggregate_count_parameters, {allowDiskUse => 1});
}
);
Expand All @@ -1537,7 +1539,8 @@ sub query_list_of_tags ($request_ref, $query_ref) {
if $log->is_debug();
$count_results = execute_query(
sub {
return get_products_tags_collection()
return get_products_collection(
{obsolete => request_param($request_ref, "obsolete"), tags => 1})
->aggregate($aggregate_count_parameters, {allowDiskUse => 1});
}
);
Expand Down Expand Up @@ -4372,7 +4375,8 @@ sub count_products ($request_ref, $query_ref) {
$log->debug("Counting MongoDB documents for query", {query => $query_ref}) if $log->is_debug();
$count = execute_query(
sub {
return get_products_collection()->count_documents($query_ref);
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->count_documents($query_ref);
}
);
};
Expand Down Expand Up @@ -4891,7 +4895,8 @@ sub search_and_display_products ($request_ref, $query_ref, $sort_by, $limit, $pa
$log->debug("Counting MongoDB documents for query", {query => $query_ref}) if $log->is_debug();
$count = execute_query(
sub {
return get_products_tags_collection()->count_documents($query_ref);
return get_products_collection({obsolete => request_param($request_ref, "obsolete"), tags => 1})
->count_documents($query_ref);
}
);
$log->info("MongoDB count query ok", {error => $@, count => $count}) if $log->is_info();
Expand All @@ -4901,7 +4906,8 @@ sub search_and_display_products ($request_ref, $query_ref, $sort_by, $limit, $pa
$log->debug("Executing MongoDB query", {query => $aggregate_parameters}) if $log->is_debug();
$cursor = execute_query(
sub {
return get_products_tags_collection()->aggregate($aggregate_parameters, {allowDiskUse => 1});
return get_products_collection({obsolete => request_param($request_ref, "obsolete"), tags => 1})
->aggregate($aggregate_parameters, {allowDiskUse => 1});
}
);
}
Expand Down Expand Up @@ -4951,7 +4957,9 @@ sub search_and_display_products ($request_ref, $query_ref, $sort_by, $limit, $pa
$log->debug("count_documents on smaller products_tags collection",
{key => $key_count})
if $log->is_debug();
return get_products_tags_collection()->count_documents($query_ref);
return get_products_collection(
{obsolete => request_param($request_ref, "obsolete"), tags => 1})
->count_documents($query_ref);
}
);

Expand All @@ -4962,7 +4970,9 @@ sub search_and_display_products ($request_ref, $query_ref, $sort_by, $limit, $pa
sub {
$log->debug("count_documents on complete products collection", {key => $key_count})
if $log->is_debug();
return get_products_collection()->count_documents($query_ref);
return get_products_collection(
{obsolete => request_param($request_ref, "obsolete")})
->count_documents($query_ref);
}
);
}
Expand All @@ -4988,7 +4998,8 @@ sub search_and_display_products ($request_ref, $query_ref, $sort_by, $limit, $pa
sub {
$log->debug("empty query_ref, use estimated_document_count fot better performance", {})
if $log->is_debug();
return get_products_collection()->estimated_document_count();
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->estimated_document_count();
}
);
}
Expand All @@ -4999,8 +5010,8 @@ sub search_and_display_products ($request_ref, $query_ref, $sort_by, $limit, $pa
if $log->is_debug();
$cursor = execute_query(
sub {
return get_products_collection()->query($query_ref)->fields($fields_ref)->sort($sort_ref)
->limit($limit)->skip($skip);
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->query($query_ref)->fields($fields_ref)->sort($sort_ref)->limit($limit)->skip($skip);
}
);
$log->info("MongoDB query ok", {error => $@}) if $log->is_info();
Expand Down Expand Up @@ -6361,7 +6372,8 @@ sub search_and_graph_products ($request_ref, $query_ref, $graph_ref) {
eval {
$cursor = execute_query(
sub {
return get_products_collection()->query($query_ref)->fields($fields_ref);
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->query($query_ref)->fields($fields_ref);
}
);
};
Expand Down Expand Up @@ -6493,7 +6505,8 @@ sub search_and_map_products ($request_ref, $query_ref, $graph_ref) {
eval {
$cursor = execute_query(
sub {
return get_products_collection()->query($query_ref)->fields(
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->query($query_ref)->fields(
{
code => 1,
lc => 1,
Expand All @@ -6505,7 +6518,7 @@ sub search_and_map_products ($request_ref, $query_ref, $graph_ref) {
origins => 1,
emb_codes_tags => 1,
}
);
);
}
);
};
Expand Down Expand Up @@ -10970,7 +10983,8 @@ sub search_and_analyze_recipes ($request_ref, $query_ref) {
eval {
$cursor = execute_query(
sub {
return get_products_collection()->query($query_ref)->fields($fields_ref);
return get_products_collection({obsolete => request_param($request_ref, "obsolete")})
->query($query_ref)->fields($fields_ref);
}
);
};
Expand Down
3 changes: 2 additions & 1 deletion lib/ProductOpener/Food.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1662,7 +1662,8 @@ sub create_nutrients_level_taxonomy() {
my ($nid, $low, $high) = @{$nutrient_level_ref};
foreach my $level ('low', 'moderate', 'high') {
$nutrient_levels_taxonomy
.= "\n" . 'en:'
.= "\n"
. 'en:'
. sprintf(
$Lang{nutrient_in_quantity}{en},
display_taxonomy_tag("en", "nutrients", "zz:$nid"),
Expand Down
64 changes: 57 additions & 7 deletions lib/ProductOpener/Products.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1001,14 +1001,32 @@ sub compute_sort_keys ($product_ref) {
return;
}

=head2 store_product ($user_id, $product_ref, $comment)

Save changes of a product:
- in a new .sto file on the disk
- in MongoDB (in the products collection, or products_obsolete collection if the product is obsolete)

Before saving, some field values are computed, and product history and completeness is computed.

=cut

sub store_product ($user_id, $product_ref, $comment) {

my $code = $product_ref->{code};
my $product_id = $product_ref->{_id};
my $path = product_path($product_ref);
my $rev = $product_ref->{rev};

$log->debug("store_product - start", {code => $code, product_id => $product_id}) if $log->is_debug();
$log->debug(
"store_product - start",
{
code => $code,
product_id => $product_id,
obsolete => $product_ref->{obsolete},
was_obsolete => $product_ref->{was_obsolete}
}
) if $log->is_debug();

# In case we need to move a product from OFF to OBF etc.
# the "new_server" value will be set to off, obf etc.
Expand All @@ -1021,8 +1039,30 @@ sub store_product ($user_id, $product_ref, $comment) {
my $new_data_root = $data_root;
my $new_www_root = $www_root;

my $products_collection = get_products_collection();
my $new_products_collection = $products_collection;
# We use the was_obsolete flag so that we can remove the product from its old collection
# (either products or products_obsolete) if its obsolete status has changed
my $previous_products_collection = get_products_collection({obsolete => $product_ref->{was_obsolete}});
my $new_products_collection = get_products_collection({obsolete => $product_ref->{obsolete}});
my $delete_from_previous_products_collection = 0;

# the obsolete (and was_obsolete) field is either undef or an empty string, or contains "on"
if ( ($product_ref->{was_obsolete} and not $product_ref->{obsolete})
or (not $product_ref->{was_obsolete} and $product_ref->{obsolete}))
{
# The obsolete status changed, we need to remove the product from its previous collection
$log->debug(
"obsolete status changed",
{
code => $code,
product_id => $product_id,
obsolete => $product_ref->{obsolete},
was_obsolete => $product_ref->{was_obsolete},
previous_products_collection => $previous_products_collection
}
) if $log->is_debug();
$delete_from_previous_products_collection = 1;
}
delete $product_ref->{was_obsolete};

if ( (defined $product_ref->{server})
and (defined $options{other_servers})
Expand All @@ -1031,7 +1071,8 @@ sub store_product ($user_id, $product_ref, $comment) {
my $server = $product_ref->{server};
$new_data_root = $options{other_servers}{$server}{data_root};
$new_www_root = $options{other_servers}{$server}{www_root};
$new_products_collection = get_collection($options{other_servers}{$server}{mongodb}, 'products');
$new_products_collection = get_products_collection(
{database => $options{other_servers}{$server}{mongodb}, obsolete => $product_ref->{obsolete}});
}

if (defined $product_ref->{old_code}) {
Expand All @@ -1043,7 +1084,8 @@ sub store_product ($user_id, $product_ref, $comment) {
my $new_server = $product_ref->{new_server};
$new_data_root = $options{other_servers}{$new_server}{data_root};
$new_www_root = $options{other_servers}{$new_server}{www_root};
$new_products_collection = get_collection($options{other_servers}{$new_server}{mongodb}, 'products');
$new_products_collection = get_products_collection(
{database => $options{other_servers}{$new_server}{mongodb}, obsolete => $product_ref->{obsolete}});
$product_ref->{server} = $product_ref->{new_server};
delete $product_ref->{new_server};
}
Expand Down Expand Up @@ -1113,7 +1155,7 @@ sub store_product ($user_id, $product_ref, $comment) {

execute_query(
sub {
return $products_collection->delete_one({"_id" => $product_ref->{_id}});
return $previous_products_collection->delete_one({"_id" => $product_ref->{_id}});
}
);

Expand Down Expand Up @@ -1262,14 +1304,22 @@ sub store_product ($user_id, $product_ref, $comment) {
#return 0;
}

# First store the product data in a .sto file on disk
store("$new_data_root/products/$path/$rev.sto", $product_ref);

# Also store the product in MongoDB, unless it was marked as deleted
if ($product_ref->{deleted}) {
$new_products_collection->delete_one({"_id" => $product_ref->{_id}});
}
else {
$new_products_collection->replace_one({"_id" => $product_ref->{_id}}, $product_ref, {upsert => 1});
}

store("$new_data_root/products/$path/$rev.sto", $product_ref);
# product that has a changed obsolete status
if ($delete_from_previous_products_collection) {
$previous_products_collection->delete_one({"_id" => $product_ref->{_id}});
}

# Update link
my $link = "$new_data_root/products/$path/product.sto";
if (-l $link) {
Expand Down
Loading