From 49ddee47e4d076ed85708e45d13e7bfad1018375 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Sat, 7 Dec 2024 00:17:00 +0300 Subject: [PATCH 1/5] UBI goes distrib Pardon, I barely understand what's going on there. --- .../solr/handler/component/UBIComponent.java | 50 +++++--- .../UBIComponentDistrQueriesTest.java | 110 ++++++++++++++++++ 2 files changed, 147 insertions(+), 13 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java diff --git a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java index 6c2b4b78594..4279a30e322 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java @@ -237,27 +237,44 @@ public void process(ResponseBuilder rb) throws IOException { @Override public int distributedProcess(ResponseBuilder rb) throws IOException { - SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return ResponseBuilder.STAGE_DONE; } - if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) { - return ResponseBuilder.STAGE_DONE; + if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) { + return ResponseBuilder.STAGE_GET_FIELDS; } - doStuff(rb); + if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { + doDistribStuff(rb); + return ResponseBuilder.STAGE_DONE; + } return ResponseBuilder.STAGE_DONE; } public void doStuff(ResponseBuilder rb) throws IOException { + UBIQuery ubiQuery = getUbiQuery(rb); + if (ubiQuery == null) return; + + ResultContext rc = (ResultContext) rb.rsp.getResponse(); + DocList docs = rc.getDocList(); + // DocList docs = rb.getResults().docList; + + String docIds = extractDocIds(docs, rb.req.getSearcher()); + + ubiQuery.setDocIds(docIds); - // not sure why but sometimes we get it twoice... how can a response have the + addUserBehaviorInsightsToResponse(ubiQuery, rb); + recordQuery(ubiQuery); + } + + private static UBIQuery getUbiQuery(ResponseBuilder rb) { + // not sure why but sometimes we get it tw(o)ice... how can a response have the // the same component run twice? if (rb.rsp.getValues().get("ubi") != null) { - return; + return null; } SolrParams params = rb.req.getParams(); @@ -270,9 +287,9 @@ public void doStuff(ResponseBuilder rb) throws IOException { ubiQuery.setApplication(params.get(APPLICATION)); if (ubiQuery.getApplication() == null) { ubiQuery.setApplication( - rb.isDistrib - ? rb.req.getCloudDescriptor().getCollectionName() - : searcher.getCore().getName()); + rb.isDistrib + ? rb.req.getCloudDescriptor().getCollectionName() + : searcher.getCore().getName()); } String queryAttributes = params.get(QUERY_ATTRIBUTES); @@ -292,12 +309,19 @@ public void doStuff(ResponseBuilder rb) throws IOException { } } } + return ubiQuery; + } + + public void doDistribStuff(ResponseBuilder rb) throws IOException { + + // not sure why but sometimes we get it tw(o)ice... how can a response have the + // the same component run twice? + UBIQuery ubiQuery = getUbiQuery(rb); + if (ubiQuery == null) return; - ResultContext rc = (ResultContext) rb.rsp.getResponse(); - DocList docs = rc.getDocList(); - // DocList docs = rb.getResults().docList; - String docIds = extractDocIds(docs, searcher); + //String docIds = extractDocIds(docs, searcher); + String docIds =String.join(",", rb.resultIds.keySet().stream().map(Object::toString).toList()); ubiQuery.setDocIds(docIds); addUserBehaviorInsightsToResponse(ubiQuery, rb); diff --git a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java new file mode 100644 index 00000000000..2a222115d2d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.io.input.ReversedLinesFileReader; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.solr.client.solrj.io.SolrClientCache; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.*; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.AbstractDistribZkTestBase; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.apache.solr.handler.LoggingStream; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.time.Instant; +import java.util.*; + +@LuceneTestCase.SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45"}) +public class UBIComponentDistrQueriesTest extends SolrCloudTestCase { + + private static final String COLLECTIONORALIAS = "collection1"; + private static final int TIMEOUT = DEFAULT_TIMEOUT; + private static final String id = "id"; + + private static boolean useAlias; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(4) + .addConfig( + "conf", TEST_PATH().resolve("configsets").resolve("ubi-enabled").resolve("conf")) + .configure(); + + String collection; + useAlias = random().nextBoolean(); + if (useAlias) { + collection = COLLECTIONORALIAS + "_collection"; + } else { + collection = COLLECTIONORALIAS; + } + + CollectionAdminRequest.createCollection(collection, "conf", 2, 1) + .process(cluster.getSolrClient()); + + cluster.waitForActiveCollection(collection, 2, 2); + + AbstractDistribZkTestBase.waitForRecoveriesToFinish( + collection, cluster.getZkStateReader(), false, true, TIMEOUT); + if (useAlias) { + CollectionAdminRequest.createAlias(COLLECTIONORALIAS, collection) + .process(cluster.getSolrClient()); + } + + // ------------------- + + CollectionAdminRequest.createCollection("ubi_queries", "_default", 1, 1) + .process(cluster.getSolrClient()); + + cluster.waitForActiveCollection("ubi_queries", 1, 1); + + AbstractDistribZkTestBase.waitForRecoveriesToFinish( + "ubi_queries", cluster.getZkStateReader(), false, true, TIMEOUT); + } + + @Before + public void cleanIndex() throws Exception { + new UpdateRequest().deleteByQuery("*:*").commit(cluster.getSolrClient(), COLLECTIONORALIAS); + } + + @Test + public void testUBIQueryStream() throws Exception { + cluster.getSolrClient(COLLECTIONORALIAS).add(List.of(new SolrInputDocument("id", "1", "subject", "aa"), + new SolrInputDocument("id", "two", "subject", "aa"), + new SolrInputDocument("id", "3", "subject", "aa"))); + cluster.getSolrClient(COLLECTIONORALIAS).commit(true, true); + QueryResponse queryResponse = cluster.getSolrClient(COLLECTIONORALIAS).query(new MapSolrParams(Map.of("q", "aa", "rows", "2", "ubi", "true"))); + System.out.println(queryResponse); + } +} From 39dadf250bd6380b7b603403ac8c21461e5f4122 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Sat, 7 Dec 2024 22:55:06 +0300 Subject: [PATCH 2/5] seems like a right thing --- .../org/apache/solr/handler/component/UBIComponent.java | 7 +++++-- .../solr/configsets/ubi-enabled/conf/schema.xml | 6 +++++- .../handler/component/UBIComponentDistrQueriesTest.java | 9 ++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java index 4279a30e322..68dbea447f6 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java @@ -51,6 +51,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.handler.RequestHandlerBase.isInternalShardRequest; + /** * User Behavior Insights (UBI) is an open standard for gathering query and event data from users * and storing it in a structured format. UBI can be used for in session personalization, implicit @@ -231,8 +233,9 @@ public void process(ResponseBuilder rb) throws IOException { if (!params.getBool(COMPONENT_NAME, false)) { return; } - - doStuff(rb); + if (!isInternalShardRequest(rb.req)) { // subordinate shard req shouldn't yield logs + doStuff(rb); + } } @Override diff --git a/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml b/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml index 661b02a0f96..b080f6b2526 100644 --- a/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml +++ b/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml @@ -19,6 +19,10 @@ - + + + + + id diff --git a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java index 2a222115d2d..1e395a25a8a 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java @@ -101,10 +101,13 @@ public void cleanIndex() throws Exception { @Test public void testUBIQueryStream() throws Exception { cluster.getSolrClient(COLLECTIONORALIAS).add(List.of(new SolrInputDocument("id", "1", "subject", "aa"), - new SolrInputDocument("id", "two", "subject", "aa"), + new SolrInputDocument("id", "2" /*"two"*/, "subject", "aa"), new SolrInputDocument("id", "3", "subject", "aa"))); cluster.getSolrClient(COLLECTIONORALIAS).commit(true, true); - QueryResponse queryResponse = cluster.getSolrClient(COLLECTIONORALIAS).query(new MapSolrParams(Map.of("q", "aa", "rows", "2", "ubi", "true"))); - System.out.println(queryResponse); + QueryResponse queryResponse = cluster.getSolrClient(COLLECTIONORALIAS).query(new MapSolrParams( + Map.of("q", "aa", "df","subject", "rows", "2", "ubi", "true" + ))); + System.out.println(queryResponse.getResponse().get("ubi")); + // TODO check that ids were recorded } } From 995a0cfa7b7c9adc1f834888cb2e82bf7847e0e6 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Mon, 9 Dec 2024 00:13:19 +0300 Subject: [PATCH 3/5] now it checks that query were recorded. Not sure about doc ids. --- .../solr/handler/component/SearchComponent.java | 3 ++- .../solr/handler/component/SearchHandler.java | 1 + .../solr/configsets/ubi-enabled/conf/schema.xml | 6 +----- .../component/UBIComponentDistrQueriesTest.java | 16 +++++++++++++--- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java index dc845e1465b..df62b7338e1 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java @@ -121,5 +121,6 @@ public void initializeMetrics(SolrMetricsContext parentContext, String scope) { Map.entry(RealTimeGetComponent.COMPONENT_NAME, RealTimeGetComponent.class), Map.entry(ExpandComponent.COMPONENT_NAME, ExpandComponent.class), Map.entry(TermsComponent.COMPONENT_NAME, TermsComponent.class), - Map.entry(UBIComponent.COMPONENT_NAME, UBIComponent.class)); + Map.entry(UBIComponent.COMPONENT_NAME, UBIComponent.class)// oh r'lly?? esp giving that it receive some expr via init args + ); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 467286452e0..e4b453c5920 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -135,6 +135,7 @@ public class SearchHandler extends RequestHandlerBase protected List getDefaultComponents() { List l = new ArrayList(SearchComponent.STANDARD_COMPONENTS.keySet()); moveToFirst(l, QueryComponent.COMPONENT_NAME); + l.remove(RealTimeGetComponent.COMPONENT_NAME); // pardon. it breaks my essential cloud test. there wasn't it there ever! return l; } diff --git a/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml b/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml index b080f6b2526..661b02a0f96 100644 --- a/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml +++ b/solr/core/src/test-files/solr/configsets/ubi-enabled/conf/schema.xml @@ -19,10 +19,6 @@ - - - - - + id diff --git a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java index 1e395a25a8a..00395be1d76 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java @@ -30,6 +30,7 @@ import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.AbstractDistribZkTestBase; import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.cluster.api.SimpleMap; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; @@ -63,7 +64,7 @@ public static void setupCluster() throws Exception { .configure(); String collection; - useAlias = random().nextBoolean(); + useAlias = false; //random().nextBoolean(); if (useAlias) { collection = COLLECTIONORALIAS + "_collection"; } else { @@ -84,7 +85,8 @@ public static void setupCluster() throws Exception { // ------------------- - CollectionAdminRequest.createCollection("ubi_queries", "_default", 1, 1) + CollectionAdminRequest.createCollection("ubi_queries",// it seems like a hardcoded name why? + "_default", 1, 1) .process(cluster.getSolrClient()); cluster.waitForActiveCollection("ubi_queries", 1, 1); @@ -107,7 +109,15 @@ public void testUBIQueryStream() throws Exception { QueryResponse queryResponse = cluster.getSolrClient(COLLECTIONORALIAS).query(new MapSolrParams( Map.of("q", "aa", "df","subject", "rows", "2", "ubi", "true" ))); - System.out.println(queryResponse.getResponse().get("ubi")); + String qid = (String) ((SimpleMap) queryResponse.getResponse().get("ubi")).get("query_id"); + assertTrue(qid.length()>10); + Thread.sleep(10000); // I know what you think of // TODO check that ids were recorded + QueryResponse queryCheck = cluster.getSolrClient("ubi_queries").query(new MapSolrParams( + Map.of("q", "id:"+qid //doesn't search it why? is it a race? + ))); + // however I can't see doc ids found there. Shouldn't I ? + assertEquals(1L, queryCheck.getResults().getNumFound()); + assertEquals(queryCheck.getResults().get(0).get("id"),qid); } } From 0887cae41b7bb6b33699fd64bdacbfd0b2f901c6 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Tue, 10 Dec 2024 14:26:41 +0300 Subject: [PATCH 4/5] spotless apply --- .../handler/component/SearchComponent.java | 2 +- .../solr/handler/component/UBIComponent.java | 14 ++-- .../UBIComponentDistrQueriesTest.java | 69 +++++++++---------- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java index bdd121a8c97..b7de8aa620a 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java @@ -109,7 +109,7 @@ public void initializeMetrics(SolrMetricsContext parentContext, String scope) { // By default, don't register any metrics - but prepare a child context this.solrMetricsContext = parentContext.getChildContext(this); } - + public static final Map> STANDARD_COMPONENTS; static { diff --git a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java index 0a944221f67..fc5756f24bb 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java @@ -16,6 +16,8 @@ */ package org.apache.solr.handler.component; +import static org.apache.solr.handler.RequestHandlerBase.isInternalShardRequest; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; @@ -63,8 +65,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.handler.RequestHandlerBase.isInternalShardRequest; - /** * User Behavior Insights (UBI) is an open standard for gathering query and event data from users * and storing it in a structured format. UBI can be used for in session personalization, implicit @@ -303,9 +303,9 @@ private static UBIQuery getUbiQuery(ResponseBuilder rb) { ubiQuery.setApplication(params.get(APPLICATION)); if (ubiQuery.getApplication() == null) { ubiQuery.setApplication( - rb.isDistrib - ? rb.req.getCloudDescriptor().getCollectionName() - : searcher.getCore().getName()); + rb.isDistrib + ? rb.req.getCloudDescriptor().getCollectionName() + : searcher.getCore().getName()); } String queryAttributes = params.get(QUERY_ATTRIBUTES); @@ -334,8 +334,8 @@ public void doDistribStuff(ResponseBuilder rb) throws IOException { // the same component run twice? UBIQuery ubiQuery = getUbiQuery(rb); if (ubiQuery == null) return; - //String docIds = extractDocIds(docs, searcher); - String docIds =String.join(",", rb.resultIds.keySet().stream().map(Object::toString).toList()); + // String docIds = extractDocIds(docs, searcher); + String docIds = String.join(",", rb.resultIds.keySet().stream().map(Object::toString).toList()); ubiQuery.setDocIds(docIds); addUserBehaviorInsightsToResponse(ubiQuery, rb); diff --git a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java index 00395be1d76..c87b8f5c268 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java @@ -16,15 +16,9 @@ */ package org.apache.solr.handler.component; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.commons.io.input.ReversedLinesFileReader; +import java.util.List; +import java.util.Map; import org.apache.lucene.tests.util.LuceneTestCase; -import org.apache.solr.client.solrj.io.SolrClientCache; -import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.stream.*; -import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; -import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser; -import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; @@ -33,20 +27,10 @@ import org.apache.solr.cluster.api.SimpleMap; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.MapSolrParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.core.SolrCore; -import org.apache.solr.embedded.JettySolrRunner; -import org.apache.solr.handler.LoggingStream; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.time.Instant; -import java.util.*; - @LuceneTestCase.SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45"}) public class UBIComponentDistrQueriesTest extends SolrCloudTestCase { @@ -59,12 +43,11 @@ public class UBIComponentDistrQueriesTest extends SolrCloudTestCase { @BeforeClass public static void setupCluster() throws Exception { configureCluster(4) - .addConfig( - "conf", TEST_PATH().resolve("configsets").resolve("ubi-enabled").resolve("conf")) + .addConfig("conf", TEST_PATH().resolve("configsets").resolve("ubi-enabled").resolve("conf")) .configure(); String collection; - useAlias = false; //random().nextBoolean(); + useAlias = false; // random().nextBoolean(); if (useAlias) { collection = COLLECTIONORALIAS + "_collection"; } else { @@ -85,14 +68,17 @@ public static void setupCluster() throws Exception { // ------------------- - CollectionAdminRequest.createCollection("ubi_queries",// it seems like a hardcoded name why? - "_default", 1, 1) - .process(cluster.getSolrClient()); + CollectionAdminRequest.createCollection( + "ubi_queries", // it seems like a hardcoded name why? + "_default", + 1, + 1) + .process(cluster.getSolrClient()); cluster.waitForActiveCollection("ubi_queries", 1, 1); AbstractDistribZkTestBase.waitForRecoveriesToFinish( - "ubi_queries", cluster.getZkStateReader(), false, true, TIMEOUT); + "ubi_queries", cluster.getZkStateReader(), false, true, TIMEOUT); } @Before @@ -102,22 +88,33 @@ public void cleanIndex() throws Exception { @Test public void testUBIQueryStream() throws Exception { - cluster.getSolrClient(COLLECTIONORALIAS).add(List.of(new SolrInputDocument("id", "1", "subject", "aa"), - new SolrInputDocument("id", "2" /*"two"*/, "subject", "aa"), - new SolrInputDocument("id", "3", "subject", "aa"))); + cluster + .getSolrClient(COLLECTIONORALIAS) + .add( + List.of( + new SolrInputDocument("id", "1", "subject", "aa"), + new SolrInputDocument("id", "2" /*"two"*/, "subject", "aa"), + new SolrInputDocument("id", "3", "subject", "aa"))); cluster.getSolrClient(COLLECTIONORALIAS).commit(true, true); - QueryResponse queryResponse = cluster.getSolrClient(COLLECTIONORALIAS).query(new MapSolrParams( - Map.of("q", "aa", "df","subject", "rows", "2", "ubi", "true" - ))); + QueryResponse queryResponse = + cluster + .getSolrClient(COLLECTIONORALIAS) + .query( + new MapSolrParams(Map.of("q", "aa", "df", "subject", "rows", "2", "ubi", "true"))); String qid = (String) ((SimpleMap) queryResponse.getResponse().get("ubi")).get("query_id"); - assertTrue(qid.length()>10); + assertTrue(qid.length() > 10); Thread.sleep(10000); // I know what you think of // TODO check that ids were recorded - QueryResponse queryCheck = cluster.getSolrClient("ubi_queries").query(new MapSolrParams( - Map.of("q", "id:"+qid //doesn't search it why? is it a race? - ))); + QueryResponse queryCheck = + cluster + .getSolrClient("ubi_queries") + .query( + new MapSolrParams( + Map.of( + "q", "id:" + qid // doesn't search it why? is it a race? + ))); // however I can't see doc ids found there. Shouldn't I ? assertEquals(1L, queryCheck.getResults().getNumFound()); - assertEquals(queryCheck.getResults().get(0).get("id"),qid); + assertEquals(queryCheck.getResults().get(0).get("id"), qid); } } From a44abb6fc014095b50ca43345d72db6e0b28112c Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Tue, 10 Dec 2024 16:46:16 +0300 Subject: [PATCH 5/5] extract method --- .../solr/handler/component/UBIComponent.java | 47 ++++++++----------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java index fc5756f24bb..13c892be4d7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.function.Consumer; import org.apache.solr.client.solrj.io.SolrClientCache; import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.comp.StreamComparator; @@ -247,7 +248,16 @@ public void process(ResponseBuilder rb) throws IOException { return; } if (!isInternalShardRequest(rb.req)) { // subordinate shard req shouldn't yield logs - doStuff(rb); + storeUbiDetails( + rb, + ubiQuery -> { + try { + DocList docList = ((ResultContext) rb.rsp.getResponse()).getDocList(); + ubiQuery.setDocIds(extractDocIds(docList, rb.req.getSearcher())); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); } } @@ -263,32 +273,18 @@ public int distributedProcess(ResponseBuilder rb) throws IOException { } if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { - doDistribStuff(rb); + storeUbiDetails( + rb, + ubiQuery -> + ubiQuery.setDocIds( + String.join(",", rb.resultIds.keySet().stream().map(Object::toString).toList()))); return ResponseBuilder.STAGE_DONE; } return ResponseBuilder.STAGE_DONE; } - public void doStuff(ResponseBuilder rb) throws IOException { - UBIQuery ubiQuery = getUbiQuery(rb); - if (ubiQuery == null) return; - - ResultContext rc = (ResultContext) rb.rsp.getResponse(); - DocList docs = rc.getDocList(); - // DocList docs = rb.getResults().docList; - - String docIds = extractDocIds(docs, rb.req.getSearcher()); - - ubiQuery.setDocIds(docIds); - - addUserBehaviorInsightsToResponse(ubiQuery, rb); - recordQuery(ubiQuery); - } - private static UBIQuery getUbiQuery(ResponseBuilder rb) { - // not sure why but sometimes we get it tw(o)ice... how can a response have the - // the same component run twice? if (rb.rsp.getValues().get("ubi") != null) { return null; } @@ -328,16 +324,11 @@ private static UBIQuery getUbiQuery(ResponseBuilder rb) { return ubiQuery; } - public void doDistribStuff(ResponseBuilder rb) throws IOException { - - // not sure why but sometimes we get it tw(o)ice... how can a response have the - // the same component run twice? + private void storeUbiDetails(ResponseBuilder rb, Consumer docIdsSetter) + throws IOException { UBIQuery ubiQuery = getUbiQuery(rb); if (ubiQuery == null) return; - // String docIds = extractDocIds(docs, searcher); - String docIds = String.join(",", rb.resultIds.keySet().stream().map(Object::toString).toList()); - ubiQuery.setDocIds(docIds); - + docIdsSetter.accept(ubiQuery); addUserBehaviorInsightsToResponse(ubiQuery, rb); recordQuery(ubiQuery); }