From a51a5a364af49b0f79bef6e2b1e1d64f35d26e3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Fri, 12 Apr 2024 07:54:53 +0200 Subject: [PATCH] CSW server / Use portal filter in GetRecordById requests (#7890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CSW server / Use portal filter in GetRecordById requests. Fixes #7443 * CSW server / Use portal filter in GetRecordById requests / Only check total. (#100) --------- Co-authored-by: François Prunayre --- .../geonet/component/csw/GetRecordById.java | 24 +-- .../services/getrecords/SearchController.java | 201 ++++++++++-------- 2 files changed, 126 insertions(+), 99 deletions(-) diff --git a/csw-server/src/main/java/org/fao/geonet/component/csw/GetRecordById.java b/csw-server/src/main/java/org/fao/geonet/component/csw/GetRecordById.java index 43d7f725f7..078fd59cbc 100644 --- a/csw-server/src/main/java/org/fao/geonet/component/csw/GetRecordById.java +++ b/csw-server/src/main/java/org/fao/geonet/component/csw/GetRecordById.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== @@ -30,14 +30,9 @@ import jeeves.server.context.ServiceContext; -import org.apache.commons.lang.NotImplementedException; import org.fao.geonet.kernel.SchemaManager; -import org.fao.geonet.kernel.setting.SettingInfo; -import org.fao.geonet.utils.Log; import org.fao.geonet.Util; -import org.fao.geonet.utils.Xml; -import org.apache.commons.lang.StringUtils; import org.fao.geonet.GeonetContext; import org.fao.geonet.constants.Geonet; import org.fao.geonet.csw.common.Csw; @@ -54,7 +49,6 @@ import org.fao.geonet.kernel.csw.CatalogService; import org.fao.geonet.kernel.csw.services.AbstractOperation; import org.fao.geonet.kernel.csw.services.getrecords.SearchController; -import org.fao.geonet.domain.Pair; import org.fao.geonet.lib.Lib; import org.jdom.Element; import org.springframework.beans.factory.annotation.Autowired; @@ -76,12 +70,14 @@ public class GetRecordById extends AbstractOperation implements CatalogService { //--------------------------------------------------------------------------- static final String NAME = "GetRecordById"; - private SearchController _searchController; + + @Autowired + private SearchController searchController; @Autowired - private CatalogConfiguration _catalogConfig; + private CatalogConfiguration catalogConfig; @Autowired - private SchemaManager _schemaManager; + private SchemaManager schemaManager; @Autowired public GetRecordById(ApplicationContext applicationContext) { @@ -106,7 +102,7 @@ public Element execute(Element request, ServiceContext context) throws CatalogEx checkVersion(request); //-- Added for CSW 2.0.2 compliance by warnock@awcubed.com checkOutputFormat(request); - String outSchema = OutputSchema.parse(request.getAttributeValue("outputSchema"), _schemaManager); + String outSchema = OutputSchema.parse(request.getAttributeValue("outputSchema"), schemaManager); //-------------------------------------------------------- ElementSetName setName = getElementSetName(request, ElementSetName.SUMMARY); @@ -135,8 +131,8 @@ public Element execute(Element request, ServiceContext context) throws CatalogEx Lib.resource.checkPrivilege(context, id, ReservedOperation.view); final String displayLanguage = context.getLanguage(); - Element md = SearchController.retrieveMetadata(context, id, setName, outSchema, null, null, ResultType.RESULTS, null, - displayLanguage); + Element md = searchController.retrieveMetadata(context, id, setName, outSchema, null, null, ResultType.RESULTS,null, + displayLanguage, true); if (md != null) { final Map transformers = context.getApplicationContext() @@ -150,7 +146,7 @@ public Element execute(Element request, ServiceContext context) throws CatalogEx response.addContent(md); - if (_catalogConfig.isIncreasePopularity()) { + if (catalogConfig.isIncreasePopularity()) { gc.getBean(DataManager.class).increasePopularity(context, id); } } diff --git a/csw-server/src/main/java/org/fao/geonet/kernel/csw/services/getrecords/SearchController.java b/csw-server/src/main/java/org/fao/geonet/kernel/csw/services/getrecords/SearchController.java index 1c352b3b2a..d6d9e8283a 100644 --- a/csw-server/src/main/java/org/fao/geonet/kernel/csw/services/getrecords/SearchController.java +++ b/csw-server/src/main/java/org/fao/geonet/kernel/csw/services/getrecords/SearchController.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== @@ -57,18 +57,11 @@ import org.jdom.Content; import org.jdom.Element; import org.jdom.Namespace; -import org.geotools.api.filter.Filter; -import org.geotools.api.filter.capability.FilterCapabilities; import org.springframework.beans.factory.annotation.Autowired; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; public class SearchController { @@ -98,21 +91,58 @@ public class SearchController { * Retrieves metadata from the database. Conversion between metadata record and output schema * are defined in xml/csw/schemas/ directory. * - * @param context service context - * @param id id of metadata - * @param setName requested ElementSetName - * @param outSchema requested OutputSchema - * @param elemNames requested ElementNames - * @param typeName requested typeName - * @param resultType requested ResultType - * @param strategy ElementNames strategy - * @throws CatalogException hmm + * @param context service context + * @param id id of metadata + * @param setName requested ElementSetName + * @param outSchema requested OutputSchema + * @param elemNames requested ElementNames + * @param typeName requested typeName + * @param resultType requested ResultType + * @param strategy ElementNames strategy + * @param checkMetadataAvailableInPortal Checks if the metadata can be retrieved in the portal. + * Used in GetRecordById. GetRecords does a query with this check already. * @return The XML metadata record if the record could be converted to the required output * schema. Null if no conversion available for the schema (eg. fgdc record can not be converted * to ISO). + * @throws CatalogException hmm */ - public static Element retrieveMetadata(ServiceContext context, String id, ElementSetName setName, String - outSchema, Set elemNames, String typeName, ResultType resultType, String strategy, String displayLanguage) throws CatalogException { + public Element retrieveMetadata(ServiceContext context, String id, ElementSetName setName, String + outSchema, Set elemNames, String typeName, ResultType resultType, String strategy, String displayLanguage, + boolean checkMetadataAvailableInPortal) throws CatalogException { + + if (checkMetadataAvailableInPortal) { + // Check if the metadata is available in the portal + String elasticSearchQuery = "{ \"bool\": {\n" + + " \"must\": [\n" + + " {" + + " \"term\": {" + + " \"id\": {" + + " \"value\": \"%s\"" + + " }" + + " }" + + " } " + + " ]\n" + + " ,\"filter\":{\"query_string\":{\"query\":\"%s\"}}}}"; + + JsonNode esJsonQuery; + + try { + String filterQueryString = esFilterBuilder.build(context, "metadata", false, node); + String jsonQuery = String.format(elasticSearchQuery, id, filterQueryString); + + ObjectMapper objectMapper = new ObjectMapper(); + esJsonQuery = objectMapper.readTree(jsonQuery); + + TotalHits total = searchManager.query(esJsonQuery, new HashSet<>(), 0, 0).hits().total(); + + if (Optional.ofNullable(total).map(TotalHits::value).orElse(0L) == 0) { + return null; + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + } try { //--- get metadata from DB @@ -151,12 +181,12 @@ public static Element retrieveMetadata(ServiceContext context, String id, Elemen res = applyElementNames(context, elemNames, typeName, scm, schema, res, resultType, info, strategy); - if(Log.isDebugEnabled(Geonet.CSW_SEARCH)) + if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) Log.debug(Geonet.CSW_SEARCH, "SearchController:retrieveMetadata: before applying postprocessing on metadata Element for id " + id); res = applyPostProcessing(context, scm, schema, res, outSchema, setName, resultType, id, displayLanguage); - if(Log.isDebugEnabled(Geonet.CSW_SEARCH)) + if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) Log.debug(Geonet.CSW_SEARCH, "SearchController:retrieveMetadata: All processing is complete on metadata Element for id " + id); if (res != null) { @@ -178,39 +208,39 @@ public static Element retrieveMetadata(ServiceContext context, String id, Elemen /** * Applies requested ElementNames and typeNames. - * + *

* For ElementNames, several strategies are implemented. Clients can determine the behaviour by * sending attribute "elementname_strategy" with one of the following values: - * + *

* csw202 relaxed context geonetwork26 - * + *

* The default is 'relaxed'. The strategies cause the following behaviour: - * + *

* csw202 -- compliant to the CSW2.0.2 specification. In particular this means that complete * metadata are returned that match the requested ElementNames, only if they are valid for their * XSD. This is because GeoNetwork only supports OutputFormat=application/xml, which mandates * that valid documents are returned. Because possibly not many of the catalog's metadata are * valid, this is not the default. - * + *

* relaxed -- like csw202, but dropped the requirement to only include valid metadata. So this * returns complete metadata that match the requested ElementNames. This is the default * strategy. - * + *

* context -- does not return complete metadata but only the elements matching the request, in * their context (i.e. all ancestor elements up to the root of the document are retained). This * strategy is similar to geonetwork26 but the context allows clients to determine which of the * elements returned corresponds to which of the elements requested (in case they have the same * name). - * + *

* geonetwork26 -- behaviour as in GeoNetwork 2.6. Just return the requested elements, stripped * of any context. This can make it impossible for the client to determine which of the elements * returned corresponds to which of the elements requested; for example if the client asks for * gmd:title, the response may contain various gmd:title elements taken from different locations * in the metadata document. - * + *

* ------------------------------------------------- Relevant sections of specification about * typeNames: - * + *

* OGC 07-006 10.8.4.8: The typeNames parameter is a list of one or more names of queryable * entities in the catalogue's information model that may be constrained in the predicate of the * query. In the case of XML realization of the OGC core metadata properties (Subclause 10.2.5), @@ -221,16 +251,16 @@ public static Element retrieveMetadata(ServiceContext context, String id, Elemen * addition, all or some of the these queryable entity names may be specified in the query to * define which metadata record elements the query should present in the response to the * GetRecords operation. - * + *

* OGC 07-045: - * + *

* 8.2.2.1.1 Request (GetRecords) TypeNames. Must support *one* of “csw:Record” or * “gmd:MD_Metadata” in a query. Default value is “csw:Record”. - * + *

* So, in OGC 07-045, exactly one of csw:Record or gmd:MD_Metadata is mandated for typeName. - * + *

* ---------------------------------- Relevant specs about ElementNames: - * + *

* OGC 07-006 10.8.4.9: The ElementName parameter is used to specify one or more metadata record * elements, from the output schema specified using the outputSchema parameter, that the query * shall present in the response to the a GetRecords operation. Since clause 10.2.5 realizes the @@ -238,19 +268,19 @@ public static Element retrieveMetadata(ServiceContext context, String id, Elemen * XPath expression perhaps using qualified names. In the general case, a complete XPath * expression may be required to correctly reference an element in the information model of the * catalog. - * + *

* However, in the case where the typeNames attribute on the Query element contains a single * value, the catalogue can infer the first step in the path expression and it can be omitted. * This is usually the case when querying the core metadata properties since the only queryable * target is csw:Record. - * + *

* If the metadata record element names are not from the schema specified using the outputSchema * parameter, then the service shall raise an exception as described in Subclause 10.3.7. - * + *

* OGC 07-045: Usage of the ELEMENTNAME is not further specified here. - * + *

* ---------------------------------- Relevant specs about outputFormat: - * + *

* OGC 07-006 10.8.4.4 outputFormat parameter: In the case where the output format is * application/xml, the CSW shall generate an XML document that validates against a schema * document that is specified in the output document via the xsi:schemaLocation attribute @@ -290,7 +320,7 @@ private static Element applyElementNames(ServiceContext context, Set ele } boolean metadataContainsAllRequestedElementNames = true; - List nodes = new ArrayList(); + List nodes = new ArrayList<>(); for (String elementName : elementNames) { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) Log.debug(Geonet.CSW_SEARCH, "SearchController dealing with elementName: " + elementName); @@ -343,7 +373,7 @@ private static Element applyElementNames(ServiceContext context, Set ele Log.debug(Geonet.CSW_SEARCH, "strategy is context, constructing context to root"); } - List elementsInContextMatching = new ArrayList(); + List elementsInContextMatching = new ArrayList<>(); for (Element match : elementsMatching) { Element parent = match.getParentElement(); while (parent != null) { @@ -371,7 +401,7 @@ private static Element applyElementNames(ServiceContext context, Set ele } } - if (metadataContainsAllRequestedElementNames == true) { + if (metadataContainsAllRequestedElementNames) { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) Log.debug(Geonet.CSW_SEARCH, "metadata containa all requested elementnames: included in response"); @@ -415,27 +445,27 @@ private static Element applyElementNames(ServiceContext context, Set ele /** * TODO improve description of method. Performs the general search tasks. * - * @param context Service context - * @param startPos start position (if paged) - * @param maxRecords max records to return - * @param resultType requested ResultType - * @param outSchema requested OutputSchema - * @param setName requested ElementSetName - * @param filterExpr requested FilterExpression - * @param filterVersion requested Filter version - * @param sort requested sorting - * @param elemNames requested ElementNames - * @param typeName requested typeName - * @param maxHitsFromSummary ? - * @param strategy ElementNames strategy + * @param context Service context + * @param startPos start position (if paged) + * @param maxRecords max records to return + * @param resultType requested ResultType + * @param outSchema requested OutputSchema + * @param setName requested ElementSetName + * @param filterExpr requested FilterExpression + * @param filterVersion requested Filter version + * @param sort requested sorting + * @param elemNames requested ElementNames + * @param typeName requested typeName + * @param maxHitsFromSummary ? + * @param strategy ElementNames strategy * @return result * @throws CatalogException hmm */ - public Element search(ServiceContext context, int startPos, int maxRecords, - ResultType resultType, String outSchema, ElementSetName setName, - Element filterExpr, String filterVersion, List sort, - Set elemNames, String typeName, int maxHitsFromSummary, - String strategy) throws CatalogException { + public Element search(ServiceContext context, int startPos, int maxRecords, + ResultType resultType, String outSchema, ElementSetName setName, + Element filterExpr, String filterVersion, List sort, + Set elemNames, String typeName, int maxHitsFromSummary, + String strategy) throws CatalogException { String elasticSearchQuery = convertCswFilterToEsQuery(filterExpr, filterVersion); @@ -456,7 +486,7 @@ public Element search(ServiceContext context, int startPos, int maxRecords, // TODO: Check to get summary or remove custom summary output try { - SearchResponse result = searchManager.query(esJsonQuery, new HashSet<>(), startPos-1, maxRecords, sort); + SearchResponse result = searchManager.query(esJsonQuery, new HashSet<>(), startPos - 1, maxRecords, sort); List hits = result.hits().hits(); @@ -467,21 +497,22 @@ public Element search(ServiceContext context, int startPos, int maxRecords, throw new InvalidParameterValueEx("startPosition", String.format( "Start position (%d) can't be greater than number of matching records (%d for current search).", startPos, numMatches - )); + )); } int counter = 0; ObjectMapper objectMapper = new ObjectMapper(); - for(Hit hit : hits) { - int mdId = Integer.parseInt((String) objectMapper.convertValue(hit.source(), Map.class).get("id")); + for (Hit hit : hits) { + int mdId = Integer.parseInt((String) objectMapper.convertValue(hit.source(), Map.class).get("id")); AbstractMetadata metadata = metadataUtils.findOne(mdId); String displayLanguage = context.getLanguage(); + // The query to retrieve GetRecords, filters by portal. No need to re-check again when retrieving each metadata. Element resultMD = retrieveMetadata(context, metadata.getId() + "", - setName, outSchema, elemNames, typeName, resultType, strategy, displayLanguage); + setName, outSchema, elemNames, typeName, resultType, strategy, displayLanguage, false); if (resultMD != null) { if (resultType == ResultType.RESULTS) { @@ -528,8 +559,8 @@ public Element search(ServiceContext context, int startPos, int maxRecords, * @throws InvalidParameterValueEx hmm */ public Element applyElementSetName(ServiceContext context, SchemaManager schemaManager, String schema, - Element result, String outputSchema, ElementSetName elementSetName, - ResultType resultType, String id, String displayLanguage) throws InvalidParameterValueEx { + Element result, String outputSchema, ElementSetName elementSetName, + ResultType resultType, String id, String displayLanguage) throws InvalidParameterValueEx { Path schemaDir = schemaManager.getSchemaCSWPresentDir(schema); Path styleSheet = schemaDir.resolve(outputSchema + "-" + elementSetName + ".xsl"); @@ -554,33 +585,33 @@ public Element applyElementSetName(ServiceContext context, SchemaManager schemaM } } - private String convertCswFilterToEsQuery(Element xml, String filterVersion) { + private String convertCswFilterToEsQuery(Element xml, String filterVersion) { return CswFilter2Es.translate(FilterParser.parseFilter(xml, filterVersion), fieldMapper); } /** * Applies postprocessing stylesheet if available. - * + *

* Postprocessing files should be in the present/csw folder of the schema and have this naming: - * + *

* For default CSW service - * + *

* 1) gmd-csw-postprocessing.xsl : Postprocessing xsl applied for CSW service when requesting iso (gmd) output * 2) csw-csw-postprocessing.xsl : Postprocessing xsl applied for CSW service when requesting ogc (csw) output - * + *

* For a custom sub-portal named inspire - * + *

* 1) gmd-inspire-postprocessing.xsl : Postprocessing xsl applied for custom inspire sub-portal when requesting iso output * 2) csw-inspire-postprocessing.xsl : Postprocessing xsl applied for custom inspire sub-portal when requesting ogc (csw) output * - * @param context Service context - * @param schemaManager schemamanager - * @param schema schema - * @param result result - * @param outputSchema requested OutputSchema - * @param elementSetName requested ElementSetName - * @param resultType requested ResultTYpe - * @param id metadata id + * @param context Service context + * @param schemaManager schemamanager + * @param schema schema + * @param result result + * @param outputSchema requested OutputSchema + * @param elementSetName requested ElementSetName + * @param resultType requested ResultTYpe + * @param id metadata id * @param displayLanguage language to use in response * @return metadata * @throws InvalidParameterValueEx hmm @@ -588,7 +619,7 @@ private String convertCswFilterToEsQuery(Element xml, String filterVersion) { private static Element applyPostProcessing(ServiceContext context, SchemaManager schemaManager, String schema, Element result, String outputSchema, ElementSetName elementSetName, ResultType resultType, String id, String displayLanguage) throws InvalidParameterValueEx { - Path schemaDir = schemaManager.getSchemaCSWPresentDir(schema); + Path schemaDir = schemaManager.getSchemaCSWPresentDir(schema); final NodeInfo nodeInfo = ApplicationContextHolder.get().getBean(NodeInfo.class); @@ -597,7 +628,7 @@ private static Element applyPostProcessing(ServiceContext context, SchemaManager + "-postprocessing.xsl"); if (Files.exists(styleSheet)) { - Map params = new HashMap(); + Map params = new HashMap<>(); params.put("lang", displayLanguage); try {