Skip to content

Commit

Permalink
harvest full metadata from ArcGIS Server services and layers
Browse files Browse the repository at this point in the history
added code to get the full metadata from ArcGIS Server services and layers as opposed to relying on the item description.
  • Loading branch information
mhogeweg committed Sep 13, 2023
1 parent e0d2b0f commit 57ab8c8
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,19 @@ public ServerResponse readServiceInformation(URL url) throws IOException {
response.url = url.toExternalForm();
response.json = responseContent;
response.itemInfo = readItemInfo(new URL(url + "/info/itemInfo"));

response.hasMetadata = false;
response.metadataXML = "";
String metadataURL = url + "/info/metadata";
HttpGet getXML = new HttpGet(metadataURL);
try (CloseableHttpResponse httpResponseXML = httpClient.execute(getXML); InputStream contentStreamXML = httpResponseXML.getEntity().getContent();) {
if (httpResponseXML.getStatusLine().getStatusCode()<400) {
String responseContentXML = IOUtils.toString(contentStreamXML, "UTF-8");
response.metadataXML = responseContentXML;
response.hasMetadata = true;
}
}

return response;
}
}
Expand All @@ -201,6 +214,7 @@ public ItemInfo readItemInfo(URL url) throws IOException {
mapper.configure(Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
ItemInfo response = mapper.readValue(responseContent, ItemInfo.class);

return response;
}
}
Expand Down Expand Up @@ -228,6 +242,17 @@ public LayerInfo readLayerInformation(String folder, ServiceInfo si, LayerRef lR
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
LayerInfo response = mapper.readValue(responseContent, LayerInfo.class);
if (response.hasMetadata) {
HttpGet getXML = new HttpGet(url + String.format("/metadata", "text/xml"));
try (CloseableHttpResponse httpResponseXML = httpClient.execute(getXML); InputStream contentStreamXML = httpResponseXML.getEntity().getContent();) {
if (httpResponseXML.getStatusLine().getStatusCode()>=400) {
throw new HttpResponseException(httpResponseXML.getStatusLine().getStatusCode(), httpResponseXML.getStatusLine().getReasonPhrase());
}
String responseContentXML = IOUtils.toString(contentStreamXML, "UTF-8");
response.metadataXML = responseContentXML;
}

}
response.url = url;
response.json = responseContent;
return response;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,6 @@ public final class ItemInfo {
public String spatialReference;
public String accessInformation;
public String licenseInfo;
public boolean hasMetadata;
public String metadataXML;
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ public class LayerInfo {
public String description;
public ExtentInfo extent;
public String json;
public boolean hasMetadata;
public String metadataXML;
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
public final class ServerResponse {
public String url;
public String json;
public boolean hasMetadata;
public String metadataXML;

public String mapName;
public String serviceDescription;
Expand All @@ -36,6 +38,6 @@ public final class ServerResponse {

@Override
public String toString() {
return String.format("{ \"mapName\": \"%s\", \"serviceDescription\": \"%s\", \"spatialReference\": %s, \"initialExtent\": %s, \"fullExtent\": %s}", mapName, serviceDescription, spatialReference, initialExtent, fullExtent);
return String.format("{ \"mapName\": \"%s\", \"serviceDescription\": \"%s\", \"spatialReference\": %s, \"initialExtent\": %s, \"fullExtent\": %s, \"metadata\": %s}", mapName, serviceDescription, spatialReference, initialExtent, fullExtent, metadataXML);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ public final class WKAConstants {
public static final String WKA_BBOX = "bbox";
public static final String WKA_MODIFIED = "modified";
public static final String WKA_REFERENCES = "references";
public static final String WKA_METADATA_XML = "metadataXML";

private static final Set<String> all = new HashSet(Arrays.asList(new String[]{
WKA_IDENTIFIER, WKA_TITLE, WKA_DESCRIPTION, WKA_RESOURCE_URL,
WKA_RESOURCE_URL_SCHEME, WKA_BBOX, WKA_THUMBNAIL_URL, WKA_MODIFIED, WKA_REFERENCES
WKA_RESOURCE_URL_SCHEME, WKA_BBOX, WKA_THUMBNAIL_URL, WKA_MODIFIED, WKA_REFERENCES,
WKA_METADATA_XML
}));

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,22 @@
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import com.esri.geoportal.commons.utils.XmlUtils;
import com.esri.geoportal.geoportal.commons.geometry.GeometryService;
import com.esri.geoportal.harvester.api.DataContent;
Expand All @@ -69,10 +77,15 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.StringReader;
import java.net.URL;
import java.util.Arrays;
import java.util.stream.Collectors;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
* Ags broker.
Expand Down Expand Up @@ -171,6 +184,7 @@ private ServerResponse layerInfoToServerResponse(LayerInfo layerInfo) {
response.description = layerInfo.description;
response.fullExtent = layerInfo.extent;
response.initialExtent = layerInfo.extent;
response.metadataXML = layerInfo.metadataXML;
return response;
}

Expand Down Expand Up @@ -253,13 +267,15 @@ private DataReference createReference(ServerResponse serverResponse) throws IOEx
String itemInfoDescription = trimHtml(serverResponse.itemInfo!=null? serverResponse.itemInfo.description: null);
String serverDescription = trimHtml(StringUtils.defaultString(StringUtils.defaultIfBlank(serverResponse.description, serverResponse.serviceDescription)));
String description = StringUtils.defaultIfBlank(itemInfoDescription, serverDescription);
String metadataXML = serverResponse.metadataXML!=null? serverResponse.metadataXML: null;

HashMap<String, Attribute> attributes = new HashMap<>();
attributes.put(WKAConstants.WKA_IDENTIFIER, new StringAttribute(serverResponse.url));
attributes.put(WKAConstants.WKA_TITLE, new StringAttribute(title));
attributes.put(WKAConstants.WKA_DESCRIPTION, new StringAttribute(description));
attributes.put(WKAConstants.WKA_RESOURCE_URL, new StringAttribute(serverResponse.url));
attributes.put(WKAConstants.WKA_RESOURCE_URL_SCHEME, new StringAttribute("urn:x-esri:specification:ServiceType:ArcGIS:" + (serviceType != null ? serviceType : "Unknown")));
attributes.put(WKAConstants.WKA_METADATA_XML, new StringAttribute(metadataXML));

if (serverResponse.fullExtent != null) {
normalizeExtent(serverResponse.fullExtent, 4326);
Expand All @@ -269,10 +285,85 @@ private DataReference createReference(ServerResponse serverResponse) throws IOEx
}
}

MapAttribute attrs = new MapAttribute(attributes);
Document document = metaBuilder.create(attrs);
byte[] bytes = XmlUtils.toString(document).getBytes("UTF-8");
Document document = null;
byte[] bytes = null;

if (metadataXML != null && !metadataXML.trim().isEmpty()) {
bytes = metadataXML.getBytes("UTF-8");

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(false);
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
document = builder.parse(new InputSource(new StringReader(metadataXML)));

XPath xpath = XPathFactory.newInstance().newXPath();
NodeList thumbnailNodes = (NodeList) xpath.compile("/metadata/Binary/Thumbnail/Data").evaluate(document, XPathConstants.NODESET);
if (thumbnailNodes.getLength() > 0) {
LOG.debug(thumbnailNodes.item(0).getTextContent());
}

NodeList serviceUrlNodes = (NodeList) xpath.compile("/metadata/distInfo/distributor/distorTran/onLineSrc/linkage").evaluate(document, XPathConstants.NODESET);
if (serviceUrlNodes.getLength() > 0) {
serviceUrlNodes.item(0).setNodeValue(serverResponse.url);
} else {
// no onLineSrc/linkage nodes

// get /metadata
NodeList metadataNodes = (NodeList) xpath.compile("/metadata").evaluate(document, XPathConstants.NODESET);

// get /metadata/distInfo
NodeList distinfoNodes = (NodeList) xpath.compile("/metadata/distinfo").evaluate(document, XPathConstants.NODESET);
if (distinfoNodes.getLength() == 0) {
Element distinfoElement = document.createElement("distinfo");
metadataNodes.item(0).appendChild(distinfoElement);
}

// get /metadata/distInfo/distributor
NodeList distributorNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor").evaluate(document, XPathConstants.NODESET);
if (distributorNodes.getLength() == 0) {
distinfoNodes = (NodeList) xpath.compile("/metadata/distinfo").evaluate(document, XPathConstants.NODESET);
Element distributorElement = document.createElement("distributor");
distinfoNodes.item(0).appendChild(distributorElement);
}

// get /metadata/distInfo/distributor/distorTran
NodeList distorTranNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor/distorTran").evaluate(document, XPathConstants.NODESET);
if (distorTranNodes.getLength() == 0) {
distributorNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor").evaluate(document, XPathConstants.NODESET);
Element distorTranElement = document.createElement("distorTran");
distributorNodes.item(0).appendChild(distorTranElement);
}

// get /metadata/distInfo/distributor/distorTran/onLineSrc
NodeList onLineSrcNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor/distorTran/onLineSrc").evaluate(document, XPathConstants.NODESET);
if (onLineSrcNodes.getLength() == 0) {
distorTranNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor/distorTran").evaluate(document, XPathConstants.NODESET);
Element onLineSrcElement = document.createElement("onLineSrc");
distorTranNodes.item(0).appendChild(onLineSrcElement);
}

// get /metadata/distInfo/distributor/distorTran/onLineSrc/linkage
NodeList linkageNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor/distorTran/onLineSrc/linkage").evaluate(document, XPathConstants.NODESET);
if (linkageNodes.getLength() == 0) {
onLineSrcNodes = (NodeList) xpath.compile("/metadata/distinfo/distributor/distorTran/onLineSrc").evaluate(document, XPathConstants.NODESET);
Element linkageElement = document.createElement("linkage");
linkageElement.setTextContent(serverResponse.url);
onLineSrcNodes.item(0).appendChild(linkageElement);
}
}

} catch (Exception ex) {
LOG.error(String.format("Error geting XML document. "), ex);

}
} else {
MapAttribute attrs = new MapAttribute(attributes);
document = metaBuilder.create(attrs);
}
bytes = XmlUtils.toString(document).getBytes("UTF-8");

SimpleDataReference ref = new SimpleDataReference(getBrokerUri(), getEntityDefinition().getLabel(), serverResponse.url, null, URI.create(serverResponse.url), td.getSource().getRef(), td.getRef());
attributes.entrySet().forEach(entry -> {
ref.getAttributesMap().put(entry.getKey(), entry.getValue());
Expand Down

0 comments on commit 57ab8c8

Please sign in to comment.