diff --git a/app/api.py b/app/api.py index f4afa654..300fecbe 100644 --- a/app/api.py +++ b/app/api.py @@ -10,7 +10,10 @@ from app import config from app._types import SearchResponse from app.config import check_config_is_defined, settings -from app.postprocessing import load_result_processor +from app.postprocessing import ( + load_result_processor, + process_taxonomy_completion_response, +) from app.query import ( build_completion_query, build_elasticsearch_query_builder, @@ -29,14 +32,10 @@ logger.warning("Main configuration is not set, use CONFIG_PATH envvar") FILTER_QUERY_BUILDER = None RESULT_PROCESSOR = None - TAXONOMY_RESULT_PROCESSOR = None else: # we cache query builder and result processor here for faster processing FILTER_QUERY_BUILDER = build_elasticsearch_query_builder(config.CONFIG) RESULT_PROCESSOR = load_result_processor(config.CONFIG.result_processor) - TAXONOMY_RESULT_PROCESSOR = load_result_processor( - config.CONFIG.taxonomy.autocomplete.result_processor - ) app = FastAPI( @@ -168,11 +167,15 @@ def search( ) -@app.get("/taxonomy") +@app.get("/autocomplete") def taxonomy_autocomplete( q: Annotated[str, Query(description="User autocomplete query.")], taxonomy_name: Annotated[ - str, Query(description="Name of the taxonomy to search in.") + list[str], + Query( + description="Name(s) of the taxonomy to search in, pass " + "several time the parameter to search in several taxonomies." + ), ], lang: Annotated[ str, Query(description="Language to search in, defaults to 'en'.") @@ -180,11 +183,10 @@ def taxonomy_autocomplete( size: Annotated[int, Query(description="Number of results to return.")] = 10, ): query = build_completion_query( - q=q, taxonomy_name=taxonomy_name, lang=lang, size=size, config=config.CONFIG + q=q, taxonomy_names=taxonomy_name, lang=lang, size=size, config=config.CONFIG ) - results = query.execute() - - response = TAXONOMY_RESULT_PROCESSOR.process(results) + es_response = query.execute() + response = process_taxonomy_completion_response(es_response) return { **response, diff --git a/app/cli/perform_import.py b/app/cli/perform_import.py index 69e5e106..5ee1faf3 100644 --- a/app/cli/perform_import.py +++ b/app/cli/perform_import.py @@ -272,7 +272,7 @@ def perform_taxonomy_import(config: Config): # we create a temporary index to import to # at the end we will change alias to point to it index_date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") - next_index = f"{config.taxonomy.autocomplete.index.name}-{index_date}" + next_index = f"{config.taxonomy.index.name}-{index_date}" index = generate_taxonomy_index_object(next_index, config) # create the index @@ -281,4 +281,4 @@ def perform_taxonomy_import(config: Config): import_taxonomies(config, next_index) # make alias point to new index - update_alias(es, next_index, config.taxonomy.autocomplete.index.name) + update_alias(es, next_index, config.taxonomy.index.name) diff --git a/app/config.py b/app/config.py index 19909799..0f87de1b 100644 --- a/app/config.py +++ b/app/config.py @@ -241,26 +241,6 @@ class TaxonomyIndexConfig(BaseModel): ] = 1 -class TaxonomyAutocompleteConfig(BaseModel): - index: Annotated[ - TaxonomyIndexConfig, - Field( - description="configuration of the taxonomy index. There is a single index for all taxonomies." - ), - ] - result_processor: Annotated[ - str, - Field( - description="The full qualified reference to the Elasticsearch result processor " - "to use after search query to Elasticsearch." - ), - ] | None = None - sources: Annotated[ - list[TaxonomySourceConfig], - Field(description="configurations of the taxonomy sources (taxonomy URLs)"), - ] - - class TaxonomyConfig(BaseModel): sources: Annotated[ list[TaxonomySourceConfig], @@ -276,9 +256,11 @@ class TaxonomyConfig(BaseModel): "`taxonomy_langs` field that can be defined in each document." ), ] - autocomplete: Annotated[ - TaxonomyAutocompleteConfig, - Field(description="configuration of taxonomy autocomplete"), + index: Annotated[ + TaxonomyIndexConfig, + Field( + description="configuration of the taxonomy index. There is a single index for all taxonomies." + ), ] diff --git a/app/indexing.py b/app/indexing.py index 33cbf5c4..f233ca84 100644 --- a/app/indexing.py +++ b/app/indexing.py @@ -328,7 +328,18 @@ def generate_taxonomy_mapping_object(config: Config) -> Mapping: Object( required=True, dynamic=False, - properties={lang: Completion() for lang in supported_langs}, + properties={ + lang: Completion( + contexts=[ + { + "name": "taxonomy_name", + "path": "taxonomy_name", + "type": "category", + } + ], + ) + for lang in supported_langs + }, ), ) return mapping @@ -336,7 +347,7 @@ def generate_taxonomy_mapping_object(config: Config) -> Mapping: def generate_taxonomy_index_object(index_name: str, config: Config) -> Index: index = Index(index_name) - taxonomy_index_config = config.taxonomy.autocomplete.index + taxonomy_index_config = config.taxonomy.index index.settings( number_of_shards=taxonomy_index_config.number_of_shards, number_of_replicas=taxonomy_index_config.number_of_replicas, diff --git a/app/postprocessing.py b/app/postprocessing.py index 868383cd..08b2b07e 100644 --- a/app/postprocessing.py +++ b/app/postprocessing.py @@ -55,16 +55,16 @@ def load_result_processor(result_processor: str | None) -> BaseResultProcessor | return result_processor_cls(result_processor) -class CompletionProcessor(BaseResultProcessor): - def process(self, response: Response) -> JSONType: - output = {"took": response.took, "timed_out": response.timed_out} - options = [] - suggestion = response.suggest["taxonomy_suggest"][0] - for option in suggestion.options: - result = { - "id": option._source["id"], - "text": option.text, - } - options.append(result) - output["options"] = options - return output +def process_taxonomy_completion_response(response: Response) -> JSONType: + output = {"took": response.took, "timed_out": response.timed_out} + options = [] + suggestion = response.suggest["taxonomy_suggest"][0] + for option in suggestion.options: + result = { + "id": option._source["id"], + "text": option.text, + "taxonomy_name": option._source["taxonomy_name"], + } + options.append(result) + output["options"] = options + return output diff --git a/app/query.py b/app/query.py index 5e205d43..a92e6cb4 100644 --- a/app/query.py +++ b/app/query.py @@ -253,23 +253,39 @@ def build_search_query( def build_completion_query( - q: str, taxonomy_name: str, lang: str, size: int, config: Config + q: str, + taxonomy_names: list[str], + lang: str, + size: int, + config: Config, + fuzziness: int | None = 2, ): - """Build an elasticsearch_dsl Query. + """Build an elasticsearch_dsl completion Query. - :param q: the user raw query - :param taxonomy_name: the taxonomy we want to search in + :param q: the user autocomplete query + :param taxonomy_names: a list of taxonomies we want to search in :param lang: the language we want search in :param size: number of results to return :param config: configuration to use + :param fuzziness: fuzziness parameter for completion query :return: the built Query """ - query = Search(index=config.taxonomy.autocomplete.index.name) + completion_clause = { + "field": f"names.{lang}", + "size": size, + "contexts": {"taxonomy_name": taxonomy_names}, + } + + if fuzziness is not None: + completion_clause["fuzzy"] = {"fuzziness": fuzziness} + + query = Search(index=config.taxonomy.index.name) query = query.suggest( - "taxonomy_suggest", q, completion={"field": f"names.{lang}", "size": size} + "taxonomy_suggest", + q, + completion=completion_clause, ) - query = query.query("bool", filter=[Q("term", taxonomy_name=taxonomy_name)]) return query diff --git a/data/config/openfoodfacts.yml b/data/config/openfoodfacts.yml index 15db72cc..63aaee48 100644 --- a/data/config/openfoodfacts.yml +++ b/data/config/openfoodfacts.yml @@ -134,6 +134,60 @@ taxonomy: url: https://static.openfoodfacts.org/data/taxonomies/categories.full.json - name: label url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json + - name: additive + url: https://static.openfoodfacts.org/data/taxonomies/additives.full.json + - name: allergen + url: https://static.openfoodfacts.org/data/taxonomies/allergens.full.json + - name: amino_acid + url: https://static.openfoodfacts.org/data/taxonomies/amino_acids.full.json + - name: country + url: https://static.openfoodfacts.org/data/taxonomies/countries.full.json + - name: data_quality + url: https://static.openfoodfacts.org/data/taxonomies/data_quality.full.json + - name: food_group + url: https://static.openfoodfacts.org/data/taxonomies/food_groups.full.json + - name: improvement + url: https://static.openfoodfacts.org/data/taxonomies/improvements.full.json + - name: ingredient + url: https://static.openfoodfacts.org/data/taxonomies/ingredients.full.json + - name: ingredients_analysis + url: https://static.openfoodfacts.org/data/taxonomies/ingredients_analysis.full.json + - name: ingredients_processing + url: https://static.openfoodfacts.org/data/taxonomies/ingredients_processing.full.json + - name: label + url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json + - name: language + url: https://static.openfoodfacts.org/data/taxonomies/languages.full.json + - name: mineral + url: https://static.openfoodfacts.org/data/taxonomies/minerals.full.json + - name: misc + url: https://static.openfoodfacts.org/data/taxonomies/misc.full.json + - name: nova_group + url: https://static.openfoodfacts.org/data/taxonomies/nova_groups.full.json + - name: nucleotide + url: https://static.openfoodfacts.org/data/taxonomies/nucleotides.full.json + - name: nutrient + url: https://static.openfoodfacts.org/data/taxonomies/nutrients.full.json + - name: origin + url: https://static.openfoodfacts.org/data/taxonomies/origins.full.json + - name: other_nutritional_substance + url: https://static.openfoodfacts.org/data/taxonomies/other_nutritional_substances.full.json + - name: packaging_material + url: https://static.openfoodfacts.org/data/taxonomies/packaging_materials.full.json + - name: packaging_recycling + url: https://static.openfoodfacts.org/data/taxonomies/packaging_recycling.full.json + - name: packaging_shape + url: https://static.openfoodfacts.org/data/taxonomies/packaging_shapes.full.json + - name: periods_after_opening + url: https://static.openfoodfacts.org/data/taxonomies/periods_after_opening.full.json + - name: preservation + url: https://static.openfoodfacts.org/data/taxonomies/preservation.full.json + - name: state + url: https://static.openfoodfacts.org/data/taxonomies/states.full.json + - name: vitamin + url: https://static.openfoodfacts.org/data/taxonomies/vitamins.full.json + - name: brand + url: https://static.openfoodfacts.org/data/taxonomies/brands.full.json exported_langs: - en - fr @@ -141,66 +195,9 @@ taxonomy: - de - it - nl - autocomplete: - index: - number_of_replicas: 1 - number_of_shards: 4 - result_processor: app.postprocessing.CompletionProcessor - sources: - - name: additives - url: https://static.openfoodfacts.org/data/taxonomies/additives.full.json - - name: allergens - url: https://static.openfoodfacts.org/data/taxonomies/allergens.full.json - - name: amino_acids - url: https://static.openfoodfacts.org/data/taxonomies/amino_acids.full.json - - name: categories - url: https://static.openfoodfacts.org/data/taxonomies/categories.full.json - - name: countries - url: https://static.openfoodfacts.org/data/taxonomies/countries.full.json - - name: data_quality - url: https://static.openfoodfacts.org/data/taxonomies/data_quality.full.json - - name: food_groups - url: https://static.openfoodfacts.org/data/taxonomies/food_groups.full.json - - name: improvements - url: https://static.openfoodfacts.org/data/taxonomies/improvements.full.json - - name: ingredients - url: https://static.openfoodfacts.org/data/taxonomies/ingredients.full.json - - name: ingredients_analysis - url: https://static.openfoodfacts.org/data/taxonomies/ingredients_analysis.full.json - - name: ingredients_processing - url: https://static.openfoodfacts.org/data/taxonomies/ingredients_processing.full.json - - name: labels - url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json - - name: languages - url: https://static.openfoodfacts.org/data/taxonomies/languages.full.json - - name: minerals - url: https://static.openfoodfacts.org/data/taxonomies/minerals.full.json - - name: misc - url: https://static.openfoodfacts.org/data/taxonomies/misc.full.json - - name: nova_groups - url: https://static.openfoodfacts.org/data/taxonomies/nova_groups.full.json - - name: nucleotides - url: https://static.openfoodfacts.org/data/taxonomies/nucleotides.full.json - - name: nutrients - url: https://static.openfoodfacts.org/data/taxonomies/nutrients.full.json - - name: origins - url: https://static.openfoodfacts.org/data/taxonomies/origins.full.json - - name: other_nutritional_substances - url: https://static.openfoodfacts.org/data/taxonomies/other_nutritional_substances.full.json - - name: packaging_materials - url: https://static.openfoodfacts.org/data/taxonomies/packaging_materials.full.json - - name: packaging_recycling - url: https://static.openfoodfacts.org/data/taxonomies/packaging_recycling.full.json - - name: packaging_shapes - url: https://static.openfoodfacts.org/data/taxonomies/packaging_shapes.full.json - - name: periods_after_opening - url: https://static.openfoodfacts.org/data/taxonomies/periods_after_opening.full.json - - name: preservation - url: https://static.openfoodfacts.org/data/taxonomies/preservation.full.json - - name: states - url: https://static.openfoodfacts.org/data/taxonomies/states.full.json - - name: vitamins - url: https://static.openfoodfacts.org/data/taxonomies/vitamins.full.json + index: + number_of_replicas: 1 + number_of_shards: 4 supported_langs: - aa - ab