From d97c6d74678e7eb84bed3fa3a2e5901ccc13ff9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Tue, 3 Jan 2023 17:53:56 +0100 Subject: [PATCH 01/13] feat: generate packagings stats --- scripts/gen_packaging_stats.pl | 152 +++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100755 scripts/gen_packaging_stats.pl diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl new file mode 100755 index 0000000000000..a88977103be14 --- /dev/null +++ b/scripts/gen_packaging_stats.pl @@ -0,0 +1,152 @@ +#!/usr/bin/perl -w + +# This file is part of Product Opener. +# +# Product Opener +# Copyright (C) 2011-2023 Association Open Food Facts +# Contact: contact@openfoodfacts.org +# Address: 21 rue des Iles, 94100 Saint-Maur des Fossés, France +# +# Product Opener is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +=head1 NAME + +gen_packaging_stats.pl - Generates aggregated data about the packaging components of products for a specific category in a specific country + +=head1 DESCRIPTION + +Aggregation counts are stored in a structure of the form: + +{ + countries => { + "en:world" => .. + "en:france" => { + categories => { + "all" => .. # stats for all categories + "en:yogourts" => { + shapes => { + "en:unknown" => .. + "all" => .. # stats for all shapes + "en:bottle" => { + materials_inherited => .. # stats for inherited (parents) materials (e.g. plastic for PET) + materials => { + "all" => .. + "en:plastic" => 12, # number of products sold in France that are yogurts and that have a plastic bottle packaging component + } + }, + .. + } + }, + .. + } + }, + .. + } +} + +=cut + +use Modern::Perl '2017'; +use utf8; + +use ProductOpener::Config qw/:all/; +use ProductOpener::Store qw/:all/; +use ProductOpener::Tags qw/:all/; +use ProductOpener::Products qw/:all/; +use ProductOpener::Lang qw/:all/; +use ProductOpener::Data qw/:all/; + +use File::Path qw(mkpath); +use JSON::PP; +use Data::DeepAccess qw(deep_exists deep_get deep_set deep_val); + +# Output will be in the $data_root/data/categories_stats directory + +(-e "$data_root/data") + or mkdir("$data_root/data", oct(755)) + or die("Could not create target directory $data_root/data : $!\n"); +(-e "$data_root/data/categories_stats") + or mkdir("$data_root/data/categories_stats", oct(755)) + or die("Could not create target directory $data_root/data/categories_stats : $!\n"); + +my $query_ref = {'empty' => {"\$ne" => 1}, 'obsolete' => {"\$ne" => 1}}; + +$query_ref->{misc_tags} = 'en:packagings-with-weights'; + +my $fields_ref = { + countries_tags => 1, + categories_tags => 1, + packagings => 1, +}; + +# 300 000 ms timeout so that we can export the whole database +# 5mins is not enough, 50k docs were exported +my $cursor = get_products_collection(3 * 60 * 60 * 1000)->query($query_ref) + ->sort({created_t => 1})->fields($fields_ref); + +$cursor->immortal(1); + +my $total = 0; + +my $packagings_stats_ref = {}; + +# Go through all products +while (my $product_ref = $cursor->next) { + $total++; + + # Generate stats for all countries + en:world (products from all countries) + if (not defined $product_ref->{countries_tags}) { + $product_ref->{countries_tags} = []; + } + push @{$product_ref->{countries_tags}}, "en:world"; + + foreach my $country (@{$product_ref->{countries_tags}}) { + + # Generate stats for all categories + all (products from all categories) + if (not defined $product_ref->{categories_tags}) { + $product_ref->{categories_tags} = []; + } + push @{$product_ref->{categories_tags}}, "all"; + + foreach my $category (@{$product_ref->{categories_tags}}) { + + # Go through all packaging components + if (not defined $product_ref->{packagings}) { + $product_ref->{packagings} = []; + } + + foreach my $packaging_ref (@{$product_ref->{packagings}}) { + my $shape = $packaging_ref->{shape} || "en:unknown"; + my $material = $packaging_ref->{material} || "en:unknown"; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", $material )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all" )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", "all" )) += 1; + } + } + + } + +} + +store("$data_root/data/categories_stats/categories_packagings_stats.sto", $packagings_stats_ref); + +binmode STDOUT, ":encoding(UTF-8)"; +if (open(my $JSON, ">", "$www_root/data/categories_packagings_stats.json")) { + print $JSON encode_json($packagings_stats_ref); + close($JSON); +} + +exit(0); + From 3b6f4caa86719e65a3fd13217b3ed717f9e5bdf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Tue, 3 Jan 2023 19:12:42 +0100 Subject: [PATCH 02/13] add stats for material parents --- scripts/gen_packaging_stats.pl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index a88977103be14..9b6f36dc7d78e 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -39,7 +39,7 @@ =head1 DESCRIPTION "en:unknown" => .. "all" => .. # stats for all shapes "en:bottle" => { - materials_inherited => .. # stats for inherited (parents) materials (e.g. plastic for PET) + materials_parents => .. # stats for parents materials (e.g. PET will also count for plastic) materials => { "all" => .. "en:plastic" => 12, # number of products sold in France that are yogurts and that have a plastic bottle packaging component @@ -129,10 +129,20 @@ =head1 DESCRIPTION foreach my $packaging_ref (@{$product_ref->{packagings}}) { my $shape = $packaging_ref->{shape} || "en:unknown"; my $material = $packaging_ref->{material} || "en:unknown"; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material )) += 1; deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", $material )) += 1; deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all" )) += 1; deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", "all" )) += 1; + + my @shape_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_shapes", $shape); + my @material_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_materials", $material); + + # Also add stats to parent materials + foreach my $material_parent (@material_parents, "all") { + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials_parents", $material_parent )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials_parents", $material_parent )) += 1; + } } } From 30d718404d4d50079012d9be79501dfa8b2c8534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Thu, 5 Jan 2023 15:33:10 +0100 Subject: [PATCH 03/13] compute stats for all products, and products with weights --- scripts/gen_packaging_stats.pl | 151 ++++++++++++++++++++------------- 1 file changed, 91 insertions(+), 60 deletions(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index 9b6f36dc7d78e..5351c6d2be227 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -57,8 +57,7 @@ =head1 DESCRIPTION =cut -use Modern::Perl '2017'; -use utf8; +use ProductOpener::PerlStandards; use ProductOpener::Config qw/:all/; use ProductOpener::Store qw/:all/; @@ -71,92 +70,124 @@ =head1 DESCRIPTION use JSON::PP; use Data::DeepAccess qw(deep_exists deep_get deep_set deep_val); -# Output will be in the $data_root/data/categories_stats directory -(-e "$data_root/data") - or mkdir("$data_root/data", oct(755)) - or die("Could not create target directory $data_root/data : $!\n"); -(-e "$data_root/data/categories_stats") - or mkdir("$data_root/data/categories_stats", oct(755)) - or die("Could not create target directory $data_root/data/categories_stats : $!\n"); +=head2 generate_packaging_stats_for_query($name, $query_ref) -my $query_ref = {'empty' => {"\$ne" => 1}, 'obsolete' => {"\$ne" => 1}}; +Generate packaging stats for products matching a specific query. -$query_ref->{misc_tags} = 'en:packagings-with-weights'; +Stats are saved in .sto format in $data_root/data/categories_stats/ +and in JSON format in $www_root/data/categories_stats/ -my $fields_ref = { - countries_tags => 1, - categories_tags => 1, - packagings => 1, -}; +=head3 Arguments -# 300 000 ms timeout so that we can export the whole database -# 5mins is not enough, 50k docs were exported -my $cursor = get_products_collection(3 * 60 * 60 * 1000)->query($query_ref) - ->sort({created_t => 1})->fields($fields_ref); +=head4 name $name -$cursor->immortal(1); -my $total = 0; -my $packagings_stats_ref = {}; +=head4 query reference $query_ref -# Go through all products -while (my $product_ref = $cursor->next) { - $total++; +=cut - # Generate stats for all countries + en:world (products from all countries) - if (not defined $product_ref->{countries_tags}) { - $product_ref->{countries_tags} = []; - } - push @{$product_ref->{countries_tags}}, "en:world"; +sub generate_packaging_stats_for_query($name, $query_ref) { + + $query_ref->{'empty'} = {"\$ne" => 1}; + $query_ref->{'obsolete'} = {"\$ne" => 1}; + + my $fields_ref = { + countries_tags => 1, + categories_tags => 1, + packagings => 1, + }; + + # 300 000 ms timeout so that we can export the whole database + # 5mins is not enough, 50k docs were exported + my $cursor = get_products_collection(3 * 60 * 60 * 1000)->query($query_ref) + ->sort({created_t => 1})->fields($fields_ref); - foreach my $country (@{$product_ref->{countries_tags}}) { + $cursor->immortal(1); - # Generate stats for all categories + all (products from all categories) - if (not defined $product_ref->{categories_tags}) { - $product_ref->{categories_tags} = []; + my $total = 0; + + my $packagings_stats_ref = {}; + + # Go through all products + while (my $product_ref = $cursor->next) { + $total++; + + # Generate stats for all countries + en:world (products from all countries) + if (not defined $product_ref->{countries_tags}) { + $product_ref->{countries_tags} = []; } - push @{$product_ref->{categories_tags}}, "all"; + push @{$product_ref->{countries_tags}}, "en:world"; - foreach my $category (@{$product_ref->{categories_tags}}) { + foreach my $country (@{$product_ref->{countries_tags}}) { - # Go through all packaging components - if (not defined $product_ref->{packagings}) { - $product_ref->{packagings} = []; + # Generate stats for all categories + all (products from all categories) + if (not defined $product_ref->{categories_tags}) { + $product_ref->{categories_tags} = []; } + push @{$product_ref->{categories_tags}}, "all"; + + foreach my $category (@{$product_ref->{categories_tags}}) { + + # Go through all packaging components + if (not defined $product_ref->{packagings}) { + $product_ref->{packagings} = []; + } - foreach my $packaging_ref (@{$product_ref->{packagings}}) { - my $shape = $packaging_ref->{shape} || "en:unknown"; - my $material = $packaging_ref->{material} || "en:unknown"; + foreach my $packaging_ref (@{$product_ref->{packagings}}) { + my $shape = $packaging_ref->{shape} || "en:unknown"; + my $material = $packaging_ref->{material} || "en:unknown"; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", $material )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all" )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", "all" )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", $material )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all" )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", "all" )) += 1; - my @shape_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_shapes", $shape); - my @material_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_materials", $material); + my @shape_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_shapes", $shape); + my @material_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_materials", $material); - # Also add stats to parent materials - foreach my $material_parent (@material_parents, "all") { - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials_parents", $material_parent )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials_parents", $material_parent )) += 1; - } + # Also add stats to parent materials + foreach my $material_parent (@material_parents, "all") { + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials_parents", $material_parent )) += 1; + deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials_parents", $material_parent )) += 1; + } + } } + } } -} + # Create directories for the output if they do not exist yet + + (-e "$data_root/data") + or mkdir("$data_root/data", oct(755)) + or die("Could not create target directory $data_root/data : $!\n"); + (-e "$data_root/data/categories_stats") + or mkdir("$data_root/data/categories_stats", oct(755)) + or die("Could not create target directory $data_root/data/categories_stats : $!\n"); + (-e "$www_root/data/categories_stats") + or mkdir("$www_root/data/categories_stats", oct(755)) + or die("Could not create target directory $www_root/data/categories_stats : $!\n"); + + # Perl structure in .sto format + + store("$data_root/data/categories_stats/categories_packagings_stats.$name.sto", $packagings_stats_ref); -store("$data_root/data/categories_stats/categories_packagings_stats.sto", $packagings_stats_ref); + # JSON + + binmode STDOUT, ":encoding(UTF-8)"; + if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.$name.json")) { + print $JSON encode_json($packagings_stats_ref); + close($JSON); + } -binmode STDOUT, ":encoding(UTF-8)"; -if (open(my $JSON, ">", "$www_root/data/categories_packagings_stats.json")) { - print $JSON encode_json($packagings_stats_ref); - close($JSON); } + +generate_packaging_stats_for_query("all", {}); +generate_packaging_stats_for_query("packagings-with-weights", {misc_tags => 'en:packagings-with-weights'}); + exit(0); From 3280bd445e5c4d39bd426d6b5488c9eaed1466f7 Mon Sep 17 00:00:00 2001 From: off Date: Thu, 5 Jan 2023 18:14:02 +0100 Subject: [PATCH 04/13] display progress --- scripts/gen_packaging_stats.pl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index 5351c6d2be227..b7cde95fc905a 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -99,9 +99,14 @@ ($name, $query_ref) packagings => 1, }; - # 300 000 ms timeout so that we can export the whole database - # 5mins is not enough, 50k docs were exported - my $cursor = get_products_collection(3 * 60 * 60 * 1000)->query($query_ref) + my $socket_timeout_ms = 3 * 60 * 60 * 60000; # 3 hours + my $products_collection = get_products_collection($socket_timeout_ms); + + my $products_count = $products_collection->count_documents($query_ref); + + print STDERR "$name: $products_count products\n"; + + my $cursor = $products_collection->query($query_ref) ->sort({created_t => 1})->fields($fields_ref); $cursor->immortal(1); @@ -114,6 +119,10 @@ ($name, $query_ref) while (my $product_ref = $cursor->next) { $total++; + if ($total % 1000 == 0) { + print STDERR "$name: $total / $products_count processed\n"; + } + # Generate stats for all countries + en:world (products from all countries) if (not defined $product_ref->{countries_tags}) { $product_ref->{countries_tags} = []; @@ -186,8 +195,8 @@ ($name, $query_ref) } -generate_packaging_stats_for_query("all", {}); generate_packaging_stats_for_query("packagings-with-weights", {misc_tags => 'en:packagings-with-weights'}); +generate_packaging_stats_for_query("all", {}); exit(0); From 6e739412cdc3f5a48a2e9bbbced1590f6840acbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 6 Jan 2023 11:16:16 +0100 Subject: [PATCH 05/13] lint --- scripts/gen_packaging_stats.pl | 211 ++++++++++++++++++--------------- 1 file changed, 114 insertions(+), 97 deletions(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index b7cde95fc905a..b9f783b1989b5 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -70,7 +70,6 @@ =head1 DESCRIPTION use JSON::PP; use Data::DeepAccess qw(deep_exists deep_get deep_set deep_val); - =head2 generate_packaging_stats_for_query($name, $query_ref) Generate packaging stats for products matching a specific query. @@ -88,113 +87,131 @@ =head4 query reference $query_ref =cut -sub generate_packaging_stats_for_query($name, $query_ref) { - - $query_ref->{'empty'} = {"\$ne" => 1}; - $query_ref->{'obsolete'} = {"\$ne" => 1}; - - my $fields_ref = { - countries_tags => 1, - categories_tags => 1, - packagings => 1, - }; - - my $socket_timeout_ms = 3 * 60 * 60 * 60000; # 3 hours - my $products_collection = get_products_collection($socket_timeout_ms); - - my $products_count = $products_collection->count_documents($query_ref); - - print STDERR "$name: $products_count products\n"; - - my $cursor = $products_collection->query($query_ref) - ->sort({created_t => 1})->fields($fields_ref); +sub generate_packaging_stats_for_query ($name, $query_ref) { + + $query_ref->{'empty'} = {"\$ne" => 1}; + $query_ref->{'obsolete'} = {"\$ne" => 1}; + + my $fields_ref = { + countries_tags => 1, + categories_tags => 1, + packagings => 1, + }; + + my $socket_timeout_ms = 3 * 60 * 60 * 60000; # 3 hours + my $products_collection = get_products_collection($socket_timeout_ms); + + my $products_count = $products_collection->count_documents($query_ref); + + print STDERR "$name: $products_count products\n"; + + my $cursor = $products_collection->query($query_ref)->sort({created_t => 1})->fields($fields_ref); + + $cursor->immortal(1); + + my $total = 0; + + my $packagings_stats_ref = {}; + + # Go through all products + while (my $product_ref = $cursor->next) { + $total++; + + if ($total % 1000 == 0) { + print STDERR "$name: $total / $products_count processed\n"; + } + + # Generate stats for all countries + en:world (products from all countries) + if (not defined $product_ref->{countries_tags}) { + $product_ref->{countries_tags} = []; + } + push @{$product_ref->{countries_tags}}, "en:world"; + + foreach my $country (@{$product_ref->{countries_tags}}) { + + # Generate stats for all categories + all (products from all categories) + if (not defined $product_ref->{categories_tags}) { + $product_ref->{categories_tags} = []; + } + push @{$product_ref->{categories_tags}}, "all"; + + foreach my $category (@{$product_ref->{categories_tags}}) { + + # Go through all packaging components + if (not defined $product_ref->{packagings}) { + $product_ref->{packagings} = []; + } + + foreach my $packaging_ref (@{$product_ref->{packagings}}) { + my $shape = $packaging_ref->{shape} || "en:unknown"; + my $material = $packaging_ref->{material} || "en:unknown"; + + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material)) + += 1; + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", "all", "materials", $material)) + += 1; + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all")) + += 1; + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", "all", "materials", "all")) + += 1; + + my @shape_parents = gen_tags_hierarchy_taxonomy("en", "packaging_shapes", $shape); + my @material_parents = gen_tags_hierarchy_taxonomy("en", "packaging_materials", $material); + + # Also add stats to parent materials + foreach my $material_parent (@material_parents, "all") { + deep_val( + $packagings_stats_ref, + ( + "countries", $country, "categories", $category, + "shapes", $shape, "materials_parents", $material_parent + ) + ) += 1; + deep_val( + $packagings_stats_ref, + ( + "countries", $country, "categories", $category, + "shapes", "all", "materials_parents", $material_parent + ) + ) += 1; + } + } + } + + } - $cursor->immortal(1); - - my $total = 0; - - my $packagings_stats_ref = {}; - - # Go through all products - while (my $product_ref = $cursor->next) { - $total++; - - if ($total % 1000 == 0) { - print STDERR "$name: $total / $products_count processed\n"; } - # Generate stats for all countries + en:world (products from all countries) - if (not defined $product_ref->{countries_tags}) { - $product_ref->{countries_tags} = []; - } - push @{$product_ref->{countries_tags}}, "en:world"; - - foreach my $country (@{$product_ref->{countries_tags}}) { - - # Generate stats for all categories + all (products from all categories) - if (not defined $product_ref->{categories_tags}) { - $product_ref->{categories_tags} = []; - } - push @{$product_ref->{categories_tags}}, "all"; - - foreach my $category (@{$product_ref->{categories_tags}}) { - - # Go through all packaging components - if (not defined $product_ref->{packagings}) { - $product_ref->{packagings} = []; - } - - foreach my $packaging_ref (@{$product_ref->{packagings}}) { - my $shape = $packaging_ref->{shape} || "en:unknown"; - my $material = $packaging_ref->{material} || "en:unknown"; - - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", $material )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all" )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials", "all" )) += 1; - - my @shape_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_shapes", $shape); - my @material_parents = gen_tags_hierarchy_taxonomy ("en", "packaging_materials", $material); - - # Also add stats to parent materials - foreach my $material_parent (@material_parents, "all") { - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", $shape, "materials_parents", $material_parent )) += 1; - deep_val($packagings_stats_ref, ("countries", $country, "categories", $category, "shapes", "all", "materials_parents", $material_parent )) += 1; - } - } - } - - } - - } - - # Create directories for the output if they do not exist yet + # Create directories for the output if they do not exist yet - (-e "$data_root/data") - or mkdir("$data_root/data", oct(755)) - or die("Could not create target directory $data_root/data : $!\n"); - (-e "$data_root/data/categories_stats") - or mkdir("$data_root/data/categories_stats", oct(755)) - or die("Could not create target directory $data_root/data/categories_stats : $!\n"); - (-e "$www_root/data/categories_stats") - or mkdir("$www_root/data/categories_stats", oct(755)) - or die("Could not create target directory $www_root/data/categories_stats : $!\n"); + (-e "$data_root/data") + or mkdir("$data_root/data", oct(755)) + or die("Could not create target directory $data_root/data : $!\n"); + (-e "$data_root/data/categories_stats") + or mkdir("$data_root/data/categories_stats", oct(755)) + or die("Could not create target directory $data_root/data/categories_stats : $!\n"); + (-e "$www_root/data/categories_stats") + or mkdir("$www_root/data/categories_stats", oct(755)) + or die("Could not create target directory $www_root/data/categories_stats : $!\n"); - # Perl structure in .sto format + # Perl structure in .sto format - store("$data_root/data/categories_stats/categories_packagings_stats.$name.sto", $packagings_stats_ref); + store("$data_root/data/categories_stats/categories_packagings_stats.$name.sto", $packagings_stats_ref); - # JSON + # JSON - binmode STDOUT, ":encoding(UTF-8)"; - if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.$name.json")) { - print $JSON encode_json($packagings_stats_ref); - close($JSON); - } + binmode STDOUT, ":encoding(UTF-8)"; + if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.$name.json")) { + print $JSON encode_json($packagings_stats_ref); + close($JSON); + } } - generate_packaging_stats_for_query("packagings-with-weights", {misc_tags => 'en:packagings-with-weights'}); generate_packaging_stats_for_query("all", {}); From b623837c8a7842fba8ad8cf77c778abaae20ca8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 6 Jan 2023 12:02:23 +0100 Subject: [PATCH 06/13] return --- scripts/gen_packaging_stats.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index b9f783b1989b5..0212f366f417d 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -210,6 +210,7 @@ ($name, $query_ref) close($JSON); } + return; } generate_packaging_stats_for_query("packagings-with-weights", {misc_tags => 'en:packagings-with-weights'}); From efadb754d0af4b2a34417a26cf3265c5044e8fd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 6 Jan 2023 15:41:27 +0100 Subject: [PATCH 07/13] Update scripts/gen_packaging_stats.pl Co-authored-by: Alex Garel --- scripts/gen_packaging_stats.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index 0212f366f417d..a65b1e0535877 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -92,6 +92,7 @@ ($name, $query_ref) $query_ref->{'empty'} = {"\$ne" => 1}; $query_ref->{'obsolete'} = {"\$ne" => 1}; + # fields to retrieve my $fields_ref = { countries_tags => 1, categories_tags => 1, From 7cfe69e6144389a79ca27a6b1a475d1c744d059c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 6 Jan 2023 15:41:33 +0100 Subject: [PATCH 08/13] Update scripts/gen_packaging_stats.pl Co-authored-by: Alex Garel --- scripts/gen_packaging_stats.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index a65b1e0535877..31ebe0950a7c1 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -89,6 +89,7 @@ =head4 query reference $query_ref sub generate_packaging_stats_for_query ($name, $query_ref) { + # we will filter out empty and obsolet products $query_ref->{'empty'} = {"\$ne" => 1}; $query_ref->{'obsolete'} = {"\$ne" => 1}; From c0ce937db2a8d34372dac8485963f8de112870c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 6 Jan 2023 15:42:25 +0100 Subject: [PATCH 09/13] Update scripts/gen_packaging_stats.pl Co-authored-by: Alex Garel --- scripts/gen_packaging_stats.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index 31ebe0950a7c1..ce6dc6675f1b2 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -124,6 +124,7 @@ ($name, $query_ref) } # Generate stats for all countries + en:world (products from all countries) + # add a virtual en:world country to every products if (not defined $product_ref->{countries_tags}) { $product_ref->{countries_tags} = []; } From 3b234f771a6387f345248c9548f969cd72389487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 6 Jan 2023 17:51:06 +0100 Subject: [PATCH 10/13] changes from code review --- scripts/gen_packaging_stats.pl | 213 +++++++++++++++++++-------------- 1 file changed, 122 insertions(+), 91 deletions(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index ce6dc6675f1b2..882241553dd17 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -69,6 +69,124 @@ =head1 DESCRIPTION use File::Path qw(mkpath); use JSON::PP; use Data::DeepAccess qw(deep_exists deep_get deep_set deep_val); +use Getopt::Long; + +my $quiet; + +GetOptions("quiet" => \$quiet) + or die("Error in command line arguments: use --quiet to silence progress messages"); + +=head2 add_product_to_stats($packagings_stats_ref, $product_ref) + +Add data from all packagings of a product to stats for all its countries and categories combinations. + +=cut + +sub add_product_to_stats ($packagings_stats_ref, $product_ref) { + + # Generate stats for all countries + en:world (products from all countries) + # add a virtual en:world country to every products + if (not defined $product_ref->{countries_tags}) { + $product_ref->{countries_tags} = []; + } + push @{$product_ref->{countries_tags}}, "en:world"; + + # Generate stats for all categories + all (products from all categories) + if (not defined $product_ref->{categories_tags}) { + $product_ref->{categories_tags} = []; + } + push @{$product_ref->{categories_tags}}, "all"; + + # Go through all packaging components + if (not defined $product_ref->{packagings}) { + $product_ref->{packagings} = []; + } + + foreach my $packaging_ref (@{$product_ref->{packagings}}) { + my $shape = $packaging_ref->{shape} || "en:unknown"; + my $material = $packaging_ref->{material} || "en:unknown"; + + my @shape_parents = gen_tags_hierarchy_taxonomy("en", "packaging_shapes", $shape); + my @material_parents = gen_tags_hierarchy_taxonomy("en", "packaging_materials", $material); + + # Go through all countries + foreach my $country (@{$product_ref->{countries_tags}}) { + + # Go through all categories + foreach my $category (@{$product_ref->{categories_tags}}) { + + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material)) + += 1; + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", "all", "materials", $material)) + += 1; + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all")) + += 1; + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, "shapes", "all", "materials", "all")) + += 1; + + # Also add stats to parent materials + foreach my $material_parent (@material_parents, "all") { + deep_val( + $packagings_stats_ref, + ( + "countries", $country, "categories", $category, + "shapes", $shape, "materials_parents", $material_parent + ) + ) += 1; + deep_val( + $packagings_stats_ref, + ( + "countries", $country, "categories", $category, + "shapes", "all", "materials_parents", $material_parent + ) + ) += 1; + } + } + } + } + + return; +} + +=head2 store_stats($name, $packagings_stats_ref) + +Store the stats in .sto format for internal use in Product Opener, +and in JSON in /html/data for external use. + +=cut + +sub store_stats ($name, $packagings_stats_ref) { + + # Create directories for the output if they do not exist yet + + (-e "$data_root/data") + or mkdir("$data_root/data", oct(755)) + or die("Could not create target directory $data_root/data : $!\n"); + (-e "$data_root/data/categories_stats") + or mkdir("$data_root/data/categories_stats", oct(755)) + or die("Could not create target directory $data_root/data/categories_stats : $!\n"); + (-e "$www_root/data/categories_stats") + or mkdir("$www_root/data/categories_stats", oct(755)) + or die("Could not create target directory $www_root/data/categories_stats : $!\n"); + + # Perl structure in .sto format + + store("$data_root/data/categories_stats/categories_packagings_stats.$name.sto", $packagings_stats_ref); + + # JSON + + binmode STDOUT, ":encoding(UTF-8)"; + if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.$name.json")) { + print $JSON encode_json($packagings_stats_ref); + close($JSON); + } + + return; +} =head2 generate_packaging_stats_for_query($name, $query_ref) @@ -81,8 +199,6 @@ =head3 Arguments =head4 name $name - - =head4 query reference $query_ref =cut @@ -105,7 +221,7 @@ ($name, $query_ref) my $products_count = $products_collection->count_documents($query_ref); - print STDERR "$name: $products_count products\n"; + $quiet or print STDERR "$name: $products_count products\n"; my $cursor = $products_collection->query($query_ref)->sort({created_t => 1})->fields($fields_ref); @@ -120,98 +236,13 @@ ($name, $query_ref) $total++; if ($total % 1000 == 0) { - print STDERR "$name: $total / $products_count processed\n"; - } - - # Generate stats for all countries + en:world (products from all countries) - # add a virtual en:world country to every products - if (not defined $product_ref->{countries_tags}) { - $product_ref->{countries_tags} = []; - } - push @{$product_ref->{countries_tags}}, "en:world"; - - foreach my $country (@{$product_ref->{countries_tags}}) { - - # Generate stats for all categories + all (products from all categories) - if (not defined $product_ref->{categories_tags}) { - $product_ref->{categories_tags} = []; - } - push @{$product_ref->{categories_tags}}, "all"; - - foreach my $category (@{$product_ref->{categories_tags}}) { - - # Go through all packaging components - if (not defined $product_ref->{packagings}) { - $product_ref->{packagings} = []; - } - - foreach my $packaging_ref (@{$product_ref->{packagings}}) { - my $shape = $packaging_ref->{shape} || "en:unknown"; - my $material = $packaging_ref->{material} || "en:unknown"; - - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material)) - += 1; - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", "all", "materials", $material)) - += 1; - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all")) - += 1; - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", "all", "materials", "all")) - += 1; - - my @shape_parents = gen_tags_hierarchy_taxonomy("en", "packaging_shapes", $shape); - my @material_parents = gen_tags_hierarchy_taxonomy("en", "packaging_materials", $material); - - # Also add stats to parent materials - foreach my $material_parent (@material_parents, "all") { - deep_val( - $packagings_stats_ref, - ( - "countries", $country, "categories", $category, - "shapes", $shape, "materials_parents", $material_parent - ) - ) += 1; - deep_val( - $packagings_stats_ref, - ( - "countries", $country, "categories", $category, - "shapes", "all", "materials_parents", $material_parent - ) - ) += 1; - } - } - } - + $quiet or print STDERR "$name: $total / $products_count processed\n"; } + add_product_to_stats($packagings_stats_ref, $product_ref); } - # Create directories for the output if they do not exist yet - - (-e "$data_root/data") - or mkdir("$data_root/data", oct(755)) - or die("Could not create target directory $data_root/data : $!\n"); - (-e "$data_root/data/categories_stats") - or mkdir("$data_root/data/categories_stats", oct(755)) - or die("Could not create target directory $data_root/data/categories_stats : $!\n"); - (-e "$www_root/data/categories_stats") - or mkdir("$www_root/data/categories_stats", oct(755)) - or die("Could not create target directory $www_root/data/categories_stats : $!\n"); - - # Perl structure in .sto format - - store("$data_root/data/categories_stats/categories_packagings_stats.$name.sto", $packagings_stats_ref); - - # JSON - - binmode STDOUT, ":encoding(UTF-8)"; - if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.$name.json")) { - print $JSON encode_json($packagings_stats_ref); - close($JSON); - } + store_stats($name, $packagings_stats_ref); return; } From cbd401015c4db4c539e34fd3373592ee66983db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Tue, 10 Jan 2023 12:32:51 +0100 Subject: [PATCH 11/13] stats for shape parents + special file for fr yogurts --- scripts/gen_packaging_stats.pl | 67 ++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index 882241553dd17..c9404787e6602 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -109,42 +109,40 @@ ($packagings_stats_ref, $product_ref) my @shape_parents = gen_tags_hierarchy_taxonomy("en", "packaging_shapes", $shape); my @material_parents = gen_tags_hierarchy_taxonomy("en", "packaging_materials", $material); + # We will generate stats for both shapes and shapes parents + my @shape_or_shape_parents = ( + ["shape", [$shape, "all"]], + ["shape_parents", [@shape_parents, "all"]] + ); + # Go through all countries foreach my $country (@{$product_ref->{countries_tags}}) { - # Go through all categories + # Go through all categories (note: the product categories already contain all parent categories) foreach my $category (@{$product_ref->{categories_tags}}) { - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", $shape, "materials", $material)) - += 1; - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", "all", "materials", $material)) - += 1; - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", $shape, "materials", "all")) - += 1; - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, "shapes", "all", "materials", "all")) - += 1; - - # Also add stats to parent materials - foreach my $material_parent (@material_parents, "all") { - deep_val( - $packagings_stats_ref, - ( - "countries", $country, "categories", $category, - "shapes", $shape, "materials_parents", $material_parent - ) - ) += 1; - deep_val( - $packagings_stats_ref, - ( - "countries", $country, "categories", $category, - "shapes", "all", "materials_parents", $material_parent - ) - ) += 1; - } + foreach my $shape_or_shape_parents_ref (@shape_or_shape_parents) { + my ($shape_or_shape_parents, $shapes_ref) = @$shape_or_shape_parents_ref; + + foreach my $shape_value (@$shapes_ref) { + foreach my $material_value ($material, "all") { + deep_val($packagings_stats_ref, + ("countries", $country, "categories", $category, $shape_or_shape_parents, $shape_value, "materials", $material_value)) + += 1; + } + + # Also add stats to parent materials + foreach my $material_parent_value (@material_parents, "all") { + deep_val( + $packagings_stats_ref, + ( + "countries", $country, "categories", $category, + $shape_or_shape_parents, $shape_value, "materials_parents", $material_parent_value + ) + ) += 1; + } + } + } } } } @@ -185,6 +183,13 @@ ($name, $packagings_stats_ref) close($JSON); } + # special export for French yogurts for the "What's around my yogurt?" operation in January 2023 + # https://fr.openfoodfacts.org/categorie/desserts-lactes-fermentes/misc/en:packagings-with-weights + if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.fr.fermented-dairy-desserts.$name.json")) { + print $JSON encode_json($packagings_stats_ref->{countries}{"en:france"}{categories}{"en:fermented-dairy-desserts"}); + close($JSON); + } + return; } From efa9c6cfb819cf49978e7ea641982c388b5eb5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Tue, 10 Jan 2023 15:07:38 +0100 Subject: [PATCH 12/13] weight stats --- scripts/gen_packaging_stats.pl | 158 ++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 34 deletions(-) diff --git a/scripts/gen_packaging_stats.pl b/scripts/gen_packaging_stats.pl index c9404787e6602..d16c64a94e997 100755 --- a/scripts/gen_packaging_stats.pl +++ b/scripts/gen_packaging_stats.pl @@ -76,13 +76,15 @@ =head1 DESCRIPTION GetOptions("quiet" => \$quiet) or die("Error in command line arguments: use --quiet to silence progress messages"); -=head2 add_product_to_stats($packagings_stats_ref, $product_ref) +=head2 add_product_to_stats($name, $packagings_stats_ref, $product_ref) Add data from all packagings of a product to stats for all its countries and categories combinations. +When $name is "packagings-with-weights", we store stats for weights, otherwise, we store only the number of products. + =cut -sub add_product_to_stats ($packagings_stats_ref, $product_ref) { +sub add_product_to_stats ($name, $packagings_stats_ref, $product_ref) { # Generate stats for all countries + en:world (products from all countries) # add a virtual en:world country to every products @@ -105,15 +107,17 @@ ($packagings_stats_ref, $product_ref) foreach my $packaging_ref (@{$product_ref->{packagings}}) { my $shape = $packaging_ref->{shape} || "en:unknown"; my $material = $packaging_ref->{material} || "en:unknown"; + my $weight = $packaging_ref->{weight_measured}; my @shape_parents = gen_tags_hierarchy_taxonomy("en", "packaging_shapes", $shape); my @material_parents = gen_tags_hierarchy_taxonomy("en", "packaging_materials", $material); - # We will generate stats for both shapes and shapes parents - my @shape_or_shape_parents = ( - ["shape", [$shape, "all"]], - ["shape_parents", [@shape_parents, "all"]] - ); + # We will generate stats for both shapes and shapes parents + my @shapes_or_shapes_parents = (["shapes", [$shape, "all"]], ["shapes_parents", [@shape_parents, "all"]]); + + # We will generate stats for both materials and materials parents + my @materials_or_materials_parents + = (["materials", [$material, "all"]], ["materials_parents", [@material_parents, "all"]]); # Go through all countries foreach my $country (@{$product_ref->{countries_tags}}) { @@ -121,28 +125,44 @@ ($packagings_stats_ref, $product_ref) # Go through all categories (note: the product categories already contain all parent categories) foreach my $category (@{$product_ref->{categories_tags}}) { - foreach my $shape_or_shape_parents_ref (@shape_or_shape_parents) { - my ($shape_or_shape_parents, $shapes_ref) = @$shape_or_shape_parents_ref; - - foreach my $shape_value (@$shapes_ref) { - foreach my $material_value ($material, "all") { - deep_val($packagings_stats_ref, - ("countries", $country, "categories", $category, $shape_or_shape_parents, $shape_value, "materials", $material_value)) - += 1; - } - - # Also add stats to parent materials - foreach my $material_parent_value (@material_parents, "all") { - deep_val( - $packagings_stats_ref, - ( - "countries", $country, "categories", $category, - $shape_or_shape_parents, $shape_value, "materials_parents", $material_parent_value - ) - ) += 1; - } - } - } + # Compute stats for shapes + shapes parents + foreach my $shapes_or_shapes_parents_ref (@shapes_or_shapes_parents) { + my ($shapes_or_shapes_parents, $shapes_ref) = @$shapes_or_shapes_parents_ref; + + foreach my $shape_value (@$shapes_ref) { + + # Compute stats for materials + materials parents + foreach my $materials_or_materials_parents_ref (@materials_or_materials_parents) { + my ($materials_or_materials_parents, $materials_ref) = @$materials_or_materials_parents_ref; + + foreach my $material_value (@$materials_ref) { + + deep_val( + $packagings_stats_ref, + ( + "countries", $country, + "categories", $category, + $shapes_or_shapes_parents, $shape_value, + $materials_or_materials_parents, $material_value, + "n" + ) + ) += 1; + if (($name eq "packagings-with-weights") and (defined $weight)) { + deep_val( + $packagings_stats_ref, + ( + "countries", $country, + "categories", $category, + $shapes_or_shapes_parents, $shape_value, + $materials_or_materials_parents, $material_value, + "weights", "values" + ) + ) .= $weight . ','; + } + } + } + } + } } } } @@ -150,6 +170,64 @@ ($packagings_stats_ref, $product_ref) return; } +=head2 compute_stats_for_all_weights ($packagings_stats_ref) + +Add data from all packagings of a product to stats for all its countries and categories combinations. + +=cut + +sub compute_stats_for_all_weights ($packagings_stats_ref) { + + # Individual weights are stored in a nested hash with this structure: + # ("countries", $country, "categories", $category, $shapes_or_shapes_parents, $shape_value, $materials_or_materials_parents, $material_value, "weights", "values")) + + foreach my $country_ref (values %{$packagings_stats_ref->{countries}}) { + foreach my $category_ref (values %{$country_ref->{categories}}) { + foreach my $shapes_or_shapes_parents_ref (values %$category_ref) { + foreach my $shape_ref (values %$shapes_or_shapes_parents_ref) { + foreach my $materials_or_materials_parents_ref (values %$shape_ref) { + foreach my $material_ref (values %$materials_or_materials_parents_ref) { + if (defined $material_ref->{weights}) { + compute_stats_for_weights($material_ref->{weights}); + } + } + } + } + } + } + } + + return; +} + +=head2 compute_stats_for_weights ($weights_ref) + +Compute stats for weight values passed in $weights_ref->{values} in comma delimited format + +=cut + +sub compute_stats_for_weights ($weights_ref) { + + # Remove trailing comma + $weights_ref->{values} =~ s/,$//; + # Turn to array + $weights_ref->{values} = [split(/,/, $weights_ref->{values})]; + + $weights_ref->{n} = 0; + $weights_ref->{sum} = 0; + + foreach my $value (@{$weights_ref->{values}}) { + $weights_ref->{n}++; + $weights_ref->{sum} += $value; + } + + if ($weights_ref->{n} > 0) { + $weights_ref->{mean} = $weights_ref->{sum} / $weights_ref->{n}; + } + + return; +} + =head2 store_stats($name, $packagings_stats_ref) Store the stats in .sto format for internal use in Product Opener, @@ -183,10 +261,17 @@ ($name, $packagings_stats_ref) close($JSON); } - # special export for French yogurts for the "What's around my yogurt?" operation in January 2023 - # https://fr.openfoodfacts.org/categorie/desserts-lactes-fermentes/misc/en:packagings-with-weights - if (open(my $JSON, ">", "$www_root/data/categories_stats/categories_packagings_stats.fr.fermented-dairy-desserts.$name.json")) { - print $JSON encode_json($packagings_stats_ref->{countries}{"en:france"}{categories}{"en:fermented-dairy-desserts"}); + # special export for French yogurts for the "What's around my yogurt?" operation in January 2023 + # https://fr.openfoodfacts.org/categorie/desserts-lactes-fermentes/misc/en:packagings-with-weights + if ( + open( + my $JSON, ">", + "$www_root/data/categories_stats/categories_packagings_stats.fr.fermented-dairy-desserts.$name.json" + ) + ) + { + print $JSON encode_json( + $packagings_stats_ref->{countries}{"en:france"}{categories}{"en:fermented-dairy-desserts"}); close($JSON); } @@ -244,7 +329,12 @@ ($name, $query_ref) $quiet or print STDERR "$name: $total / $products_count processed\n"; } - add_product_to_stats($packagings_stats_ref, $product_ref); + add_product_to_stats($name, $packagings_stats_ref, $product_ref); + } + + # Compute stats for weights + if ($name eq "packagings-with-weights") { + compute_stats_for_all_weights($packagings_stats_ref); } store_stats($name, $packagings_stats_ref); From d9ee9a97626217f2c00a9790809c46c38bc085b6 Mon Sep 17 00:00:00 2001 From: Pierre Slamich Date: Sat, 14 Jan 2023 17:22:37 +0100 Subject: [PATCH 13/13] Update labeler.yml --- .github/labeler.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/labeler.yml b/.github/labeler.yml index bcb6d87951ce4..ba6c2a76a6d5d 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -428,6 +428,7 @@ Packaging: - templates/api/knowledge-panels/environment/ecoscore/packaging.tt.json - tests/unit/packaging.t - docs/reference/schemas/packagings/** +- scripts/gen_packaging_stats.pl producers platform: - lib/ProductOpener/GS1.pm