From 37f9408a84f8a7b6251629500e7f56bc4a31bfc2 Mon Sep 17 00:00:00 2001 From: Carolyn Au Date: Thu, 20 Feb 2025 15:09:31 -0800 Subject: [PATCH] Add embeddings for a few economics indicators --- deploy/nl/catalog.yaml | 2 +- .../whatisthephylumofvolvox/debug_info.json | 4 +- .../debug_info.json | 8 +-- .../debug_info.json | 24 ++++----- .../compareobesityvs.poverty/debug_info.json | 6 +-- .../debug_info.json | 24 ++++----- .../debug_info.json | 50 +++++++++---------- .../debug_info.json | 20 ++++---- tools/nl/embeddings/input/base/_preindex.csv | 12 +++++ tools/nl/embeddings/input/base/sheets_svs.csv | 7 ++- 10 files changed, 87 insertions(+), 70 deletions(-) diff --git a/deploy/nl/catalog.yaml b/deploy/nl/catalog.yaml index 137497ae98..891c52a289 100644 --- a/deploy/nl/catalog.yaml +++ b/deploy/nl/catalog.yaml @@ -84,7 +84,7 @@ indexes: base_uae_mem: store_type: MEMORY source_path: ../../tools/nl/embeddings/input/base - embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_08_06_15_55_55/embeddings.csv + embeddings_path: gs://datcom-nl-models/base_uae_mem_2025_02_20_14_15_21/embeddings.csv model: uae-large-v1-model healthcheck_query: "Life expectancy" base_mistral_mem: diff --git a/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json b/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json index 1f9093288c..c0a8b9ffe2 100644 --- a/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json +++ b/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json @@ -24,7 +24,7 @@ 0.314266, 0.312765, 0.312629, - 0.312504, + 0.312505, 0.311774, 0.310339, 0.310075, @@ -83,7 +83,7 @@ 0.248279, 0.247354, 0.24662, - 0.246164, + 0.246163, 0.238985, 0.231355, 0.230288, diff --git a/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json b/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json index 19d81bc5cc..d90ccd9114 100644 --- a/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json +++ b/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json @@ -17,9 +17,9 @@ 0.419815, 0.419221, 0.413667, - 0.409653, + 0.409652, 0.406611, - 0.401372, + 0.401371, 0.401019, 0.400638, 0.398746, @@ -27,7 +27,7 @@ 0.390524, 0.389033, 0.388461, - 0.388383, + 0.388382, 0.38733, 0.386422, 0.385896, @@ -91,7 +91,7 @@ 0.384802, 0.341795, 0.324578, - 0.322011, + 0.32201, 0.320837 ], "PROP": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json index 9871ea9321..aba5931ec3 100644 --- a/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json @@ -11,7 +11,7 @@ 0.918759, 0.859054, 0.835409, - 0.782313, + 0.782314, 0.777319, 0.776289, 0.766327, @@ -27,23 +27,23 @@ 0.753294, 0.750956, 0.748258, - 0.745146, + 0.745145, 0.744395, - 0.743155, - 0.74267, + 0.743156, + 0.742669, 0.73966, - 0.737153, + 0.737154, 0.7366, 0.734715, 0.733652, 0.732608, 0.732516, - 0.7303, - 0.729927, + 0.730299, + 0.729926, 0.728912, - 0.728903, - 0.728072, - 0.725959, + 0.728904, + 0.728073, + 0.725958, 0.725933 ], "MultiSV": { @@ -54,7 +54,7 @@ "Parts": [ { "CosineScore": [ - 0.925786 + 0.925785 ], "QueryPart": "male population", "SV": [ @@ -89,7 +89,7 @@ }, { "CosineScore": [ - 0.898238, + 0.898237, 0.872347 ], "QueryPart": "population", diff --git a/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json index fc5c7ca1eb..1568745bee 100644 --- a/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json @@ -27,14 +27,14 @@ 0.712695, 0.709485, 0.708555, - 0.708555, + 0.708554, 0.707882, 0.707771, 0.704539, 0.702431, 0.700454, 0.699499, - 0.695671, + 0.69567, 0.694791, 0.691324 ], @@ -49,7 +49,7 @@ 0.846783, 0.831656, 0.820154, - 0.817706 + 0.817707 ], "QueryPart": "obesity", "SV": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json index 54add40913..3da8c3e1bd 100644 --- a/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json @@ -10,12 +10,12 @@ "CosineScore": [ 0.786479, 0.713557, - 0.691805, - 0.674091, + 0.691806, + 0.674092, 0.673459, 0.670462, 0.663511, - 0.659392, + 0.659393, 0.652018, 0.648482, 0.64617, @@ -26,15 +26,15 @@ 0.640211, 0.638421, 0.638034, - 0.637646, - 0.634638, + 0.637647, + 0.634637, 0.634146, 0.633848, 0.63227, 0.632221, 0.630495, 0.630297, - 0.629836, + 0.629835, 0.629568 ], "MultiSV": { @@ -69,11 +69,11 @@ "Parts": [ { "CosineScore": [ - 0.758862, + 0.758863, 0.746364, - 0.740166, + 0.740165, 0.735123, - 0.713099 + 0.7131 ], "QueryPart": "factors like obesity", "SV": [ @@ -113,8 +113,8 @@ { "CosineScore": [ 0.776843, - 0.749505, - 0.734265, + 0.749504, + 0.734266, 0.732511 ], "QueryPart": "factors like obesity blood pressure asthma", @@ -142,7 +142,7 @@ "Parts": [ { "CosineScore": [ - 0.820791 + 0.82079 ], "QueryPart": "factors like obesity blood pressure", "SV": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json index 08ba6884a9..aca640ac56 100644 --- a/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json @@ -8,44 +8,44 @@ "query_with_places_removed": "number of poor hispanic women with phd", "sv_matching": { "CosineScore": [ - 0.79858, + 0.798579, 0.788326, 0.776347, - 0.774062, - 0.74103, - 0.733467, - 0.722828, + 0.774063, + 0.741029, + 0.733466, + 0.722829, 0.722569, 0.699547, 0.699502, 0.697618, 0.697434, - 0.696004, - 0.695343, + 0.696003, + 0.695344, 0.694517, 0.693964, - 0.693536, - 0.690841, + 0.693537, + 0.69084, 0.690697, 0.689673, 0.688952, - 0.68862, + 0.688619, 0.686329, 0.685918, 0.682754, 0.682214, - 0.681562, - 0.679875, + 0.681563, + 0.679874, 0.676856, 0.676309, - 0.675154, + 0.675155, 0.674499, 0.672567, - 0.671797, + 0.671798, 0.669168, 0.669001, 0.668218, - 0.66781, + 0.667811, 0.665069, 0.66487 ], @@ -59,10 +59,10 @@ "CosineScore": [ 0.831177, 0.831001, - 0.825515, + 0.825516, 0.819551, - 0.81456, - 0.809066, + 0.814561, + 0.809067, 0.808639, 0.80744, 0.800295, @@ -104,7 +104,7 @@ { "CosineScore": [ 0.836672, - 0.797269 + 0.79727 ], "QueryPart": "number of poor", "SV": [ @@ -114,9 +114,9 @@ }, { "CosineScore": [ - 0.815151, - 0.7886, - 0.777555, + 0.81515, + 0.788599, + 0.777556, 0.775205 ], "QueryPart": "hispanic women phd", @@ -135,8 +135,8 @@ "Parts": [ { "CosineScore": [ - 0.849594, - 0.833501 + 0.849593, + 0.833502 ], "QueryPart": "number of poor hispanic women", "SV": [ @@ -148,7 +148,7 @@ "CosineScore": [ 0.764697, 0.747669, - 0.737707, + 0.737706, 0.736277, 0.730588 ], diff --git a/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json index 63aad960da..56c5a34f93 100644 --- a/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json @@ -8,21 +8,21 @@ "query_with_places_removed": "show me the impact of climate change on drought", "sv_matching": { "CosineScore": [ - 0.839368, + 0.839369, 0.793112, - 0.769997, - 0.720811, + 0.769998, + 0.72081, 0.717531, 0.699544, - 0.686346, - 0.674247, - 0.67305, + 0.686345, + 0.674246, + 0.673051, 0.670354, 0.665289, 0.664461, 0.635379, - 0.634993, - 0.634701, + 0.634994, + 0.6347, 0.630818, 0.629086, 0.625659, @@ -41,7 +41,7 @@ "Parts": [ { "CosineScore": [ - 0.878749 + 0.878748 ], "QueryPart": "show climate change", "SV": [ @@ -66,7 +66,7 @@ { "CosineScore": [ 0.794237, - 0.792901 + 0.792902 ], "QueryPart": "show climate", "SV": [ diff --git a/tools/nl/embeddings/input/base/_preindex.csv b/tools/nl/embeddings/input/base/_preindex.csv index f0b65c9b2d..f0b340e497 100644 --- a/tools/nl/embeddings/input/base/_preindex.csv +++ b/tools/nl/embeddings/input/base/_preindex.csv @@ -250,6 +250,9 @@ Amounts provided and mobilized in United States dollars per year in relation to "Annual amount of volatile organic compound emissions from non-biogenic, miscellaneous emission sources",Annual_Amount_Emissions_EPAMiscellaneousEmissionSource_NonBiogenicEmissionSource_VolatileOrganicCompound Annual growth rate of real GDP per capita,dc/topic/sdg_8.1.1 Annual growth rate of real GDP per employed person,dc/topic/sdg_8.2.1 +Annual inflation rate as measured by the consumer price index,sdg/FP_CPI_TOTL_ZG +Annual inflation rate consumer price index,sdg/FP_CPI_TOTL_ZG +"Annual inflation, consumer prices",sdg/FP_CPI_TOTL_ZG Annual mean levels of fine particulate matter (e.g. PM2.5 and PM10) in cities (population weighted),dc/topic/sdg_11.6.2 Area of Farm Growing Corn for Silage Or Greenchop,Area_Farm_CornForSilageOrGreenchop Area of Farm Growing Cotton,Area_Farm_Cotton @@ -489,6 +492,8 @@ Commute Time,dc/topic/CommuteTime Concentration of Smoke PM2.5,Concentration_AirPollutant_SmokePM25 Condition of the economy,dc/topic/Economy "Conflict-related deaths per 100K population, by sex, age and cause",dc/topic/sdg_16.1.2 +Consumer Price Index for All Urban Consumers,dc/x6l3mg60hpe7c +Consumer Price Index of Consumer Goods And Services (Seasonally Unadjusted): Urban Consumer,dc/x6l3mg60hpe7c Consumption of Anthracite Coal by the Manufacturing Industry,Annual_Consumption_Fuel_Manufacturing_AnthraciteCoal Consumption of Aviation Gasoline,Annual_Consumption_Fuel_AviationGasoline Consumption of Aviation Gasoline by International Aviation Bunkers,Annual_Consumption_Fuel_InternationalAviationBunkers_AviationGasoline @@ -648,6 +653,8 @@ Degree of sustainable public procurement policies and action plan implementation Demographics,dc/topic/Demographics Dengue,dc/topic/Dengue Dentists,dc/topic/Dentists +Deposit interest rate,worldBank/FR_INR_DPST +"Deposit interest rate is the rate paid by commercial or similar banks for demand, time, or savings deposits",worldBank/FR_INR_DPST Develop and implement tools to monitor sustainable development impacts for sustainable tourism that creates jobs and promotes local culture and products,dc/topic/sdg_12.b "Develop effective, accountable and transparent institutions at all levels",dc/topic/sdg_16.6 "Develop quality, reliable, sustainable and resilient infrastructure, including regional and trans-border infrastructure, to support economic development and human well-being, with a focus on affordable and equitable access for all",dc/topic/sdg_9.1 @@ -1003,6 +1010,9 @@ Land Cover,dc/topic/LandCover Land Use And Coverage,dc/topic/LandUseAndCoverage Landslide,dc/topic/Landslide Language,dc/topic/Language +Lending interest rate,worldBank/FR_INR_LEND +Lending interest rate adjusted for inflation as measured by the GDP deflator,worldBank/FR_INR_RINR +Lending rate is the bank rate that usually meets the short- and medium-term financing needs of the private sector,worldBank/FR_INR_LEND Length of national highway,Length_Transportation_NationalHighway Length of rail road,Length_Transportation_Railroad Length of road,Length_Transportation_Road @@ -2882,6 +2892,7 @@ Percent of land covered by Permanent Water,LandCoverFraction_PermanentWater Percent of land covered by Seasonal Water,LandCoverFraction_SeasonalWater Percent of land covered by Shrubland,LandCoverFraction_Shrubland Percent of land covered by Snow Ice,LandCoverFraction_SnowIce +"Percentage change in the cost to the average consumer of acquiring a basket of goods and services that may be fixed or changed at specified intervals, such as yearly",sdg/FP_CPI_TOTL_ZG Percentage of Bcg Immunization Coverage Among 1 Year Old female,WHO/bcgv_Female Percentage of Bcg Immunization Coverage Among 1 Year Old in Rural Areas,WHO/bcgv_Rural Percentage of Bcg Immunization Coverage Among 1 Year Old in Urban areas,WHO/bcgv_Urban @@ -3122,6 +3133,7 @@ Real Gross Value Added from Manufacturing,Amount_EconomicActivity_GrossValueAdde "Real Gross Value Added from Mining, Manufacturing, and Utilities",Amount_EconomicActivity_GrossValueAdded_ISICMiningManufacturingUtilities_RealValue "Real Gross Value Added from Transport, Storage, and Communications",Amount_EconomicActivity_GrossValueAdded_ISICTransportStorageCommunications_RealValue "Real Gross Value Added from Wholesale, Retail Trade, Restaurants, and Hotels",Amount_EconomicActivity_GrossValueAdded_ISICWholesaleRetailTradeRestaurantsHotels_RealValue +Real lending interest rate,worldBank/FR_INR_RINR "Realize timely implementation of duty-free and quota-free market access on a lasting basis for all least developed countries, consistent with World Trade Organization decisions, including by ensuring that preferential rules of origin applicable to imports from least developed countries are transparent and simple, and contribute to facilitating market access",dc/topic/sdg_17.12 Reasons for death,dc/topic/CausesOfDeath Reasons for mortality,dc/topic/CausesOfDeath diff --git a/tools/nl/embeddings/input/base/sheets_svs.csv b/tools/nl/embeddings/input/base/sheets_svs.csv index f494647406..0d39363e13 100644 --- a/tools/nl/embeddings/input/base/sheets_svs.csv +++ b/tools/nl/embeddings/input/base/sheets_svs.csv @@ -3387,6 +3387,7 @@ sdg/ER_PTD_FRHWTR,Average proportion of Freshwater Key Biodiversity Areas covere sdg/ER_PTD_MTN,Average proportion of Mountain Key Biodiversity Areas covered by protected areas sdg/ER_PTD_TERR,Average proportion of Terrestrial Key Biodiversity Areas covered by protected areas sdg/ER_RSK_LST,Red List Index +sdg/FP_CPI_TOTL_ZG,"Annual inflation, consumer prices;Annual inflation rate as measured by the consumer price index;Percentage change in the cost to the average consumer of acquiring a basket of goods and services that may be fixed or changed at specified intervals, such as yearly;Annual inflation rate consumer price index" sdg/SH_FPL_MTMM.AGE--Y15T49__SEX--F,Proportion of females of reproductive age who have their need for family planning satisfied with modern methods sdg/SH_STA_ANEM.AGE--Y15T49__SEX--F,Proportion of females aged 15-49 years with anaemia sdg/SH_STA_MORT.SEX--F,Maternal mortality @@ -3602,7 +3603,11 @@ worldBank/4_1_SHARE_RE_IN_ELECTRICITY,Renewable electricity share of total elect worldBank/EG_ELC_ACCS_RU_ZS,percentage of rural population with access to electricity worldBank/EG_ELC_ACCS_UR_ZS,percentage of urban population with access to electricity worldBank/EG_ELC_ACCS_ZS,percentage of population with access to electricity +worldBank/FR_INR_DPST,"Deposit interest rate;Deposit interest rate is the rate paid by commercial or similar banks for demand, time, or savings deposits" +worldBank/FR_INR_LEND,"Lending interest rate;Lending rate is the bank rate that usually meets the short- and medium-term financing needs of the private sector" +worldBank/FR_INR_RINR,"Real lending interest rate;Lending interest rate adjusted for inflation as measured by the GDP deflator" sdg/SG_GEN_PARL.SEX--F,proportion of seats held by women in national parliaments Count_Person_Upto18Years,children population count Count_Person_18OrMoreYears,adult population count -Count_Person_65OrMoreYears,senior population count \ No newline at end of file +Count_Person_65OrMoreYears,senior population count +dc/x6l3mg60hpe7c,"Consumer Price Index of Consumer Goods And Services (Seasonally Unadjusted): Urban Consumer; Consumer Price Index for All Urban Consumers" \ No newline at end of file