Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switchover to using Registry API for generating context products json #1036

Merged
merged 7 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,6 @@
<artifactId>log4j-core</artifactId>
<version>2.24.1</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>9.6.1</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
Expand Down
220 changes: 122 additions & 98 deletions src/main/java/gov/nasa/pds/validate/ValidateLauncher.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
Expand All @@ -54,6 +56,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.TimeZone;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand All @@ -72,18 +75,12 @@
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.Http2SolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.ls.LSInput;
import org.xml.sax.InputSource;
import org.xml.sax.SAXParseException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
Expand Down Expand Up @@ -225,7 +222,7 @@ public class ValidateLauncher {
private int everyN;

private boolean contextMismatchAsWarn = true;

private String pdfErrorDir;

private int spotCheckData;
Expand Down Expand Up @@ -509,57 +506,70 @@ else if (Flag.MODEL.getShortName().equals(o.getOpt())) {
}
}

@SuppressWarnings("unchecked")
private void getLatestJsonContext() {

String url = ToolInfo.getSearchURL();
final String searchAfterParam = "search-after";
final int pageSize = 1000;
final String searchAfterKey = "ops:Harvest_Info.ops:harvest_date_time";
List<ValidationProblem> pList = new ArrayList<>();
ObjectMapper mapper = new ObjectMapper();
String base = ToolInfo.getSearchURL();
String endpoint = ToolInfo.getEndpoint();
String query = ToolInfo.getQuery();

SolrClient client = new Http2SolrClient.Builder(url).build();
SolrQuery solrQuery = new SolrQuery(query);
solrQuery.setRequestHandler("/" + endpoint);
solrQuery.setStart(0);
solrQuery.setParam("fl",
"identifier, " + "version_id, " + "data_product_type, " + "target_name, "
+ "instrument_name, " + "instrument_host_name, " + "resource_name, "
+ "investigation_name, " + "target_type, " + "instrument_type, "
+ "instrument_host_type, " + "resource_type, " + "investigation_type, "
+ "facility_name, facility_type, airborne_name, airborne_type");

QueryResponse resp;
List<ValidationProblem> pList = new ArrayList<>();
URL url = null;
Scanner reader = null;
String searchAfter = "";
try {
resp = client.query(solrQuery);
SolrDocumentList res = resp.getResults();
solrQuery.setRows((int) res.getNumFound());
resp = client.query(solrQuery);
res = resp.getResults();
parseJsonObjectWriteTofile(res);

client.close();
ValidationProblem p1 =
new ValidationProblem(new ProblemDefinition(ExceptionType.INFO, ProblemType.GENERAL_INFO,
"Successfully updated registered context products config file. "), new URL(url));
int total = 0;
List<Map<String, Object>> contexts = new ArrayList<Map<String, Object>>();
do {
url = new URL(base + "/" + endpoint + "?limit=" + Integer.toString(pageSize) + "&q="
+ URLEncoder.encode(query, StandardCharsets.UTF_8) + "&sort=" + searchAfterKey + "&"
+ searchAfter);
LOG.debug("Query URL: " + url.toString());
reader = new Scanner(url.openStream()).useDelimiter("\\Z");
StringBuffer buffer = new StringBuffer();
while (reader.hasNext()) {
buffer.append(reader.next());
}
Map<String, Object> response = mapper.readValue(buffer.toString(), HashMap.class);
total = (Integer) ((Map<String, Object>) response.get("summary")).get("hits");
List<Map<String, Object>> dataDocuments = (List<Map<String, Object>>) response.get("data");

contexts.addAll(dataDocuments);
String searchAfterValue =
getSearchAfterFromDocument(dataDocuments.get(dataDocuments.size() - 1), searchAfterKey);

searchAfter =
searchAfterParam + "=" + URLEncoder.encode(searchAfterValue, StandardCharsets.UTF_8);
} while (contexts.size() < total);
parseJsonObjectWriteTofile(contexts, registeredProductsFile.getAbsolutePath());

ValidationProblem p1 = new ValidationProblem(
new ProblemDefinition(ExceptionType.INFO, ProblemType.GENERAL_INFO,
"Successfully updated registered context products config file from PDS Search API."),
registeredProductsFile.toURI().toURL());
pList.add(p1);
ValidationProblem p2 =
new ValidationProblem(new ProblemDefinition(ExceptionType.INFO, ProblemType.GENERAL_INFO,
res.size() + " registered context products found."), new URL(url));
ValidationProblem p2 = new ValidationProblem(
new ProblemDefinition(ExceptionType.INFO, ProblemType.GENERAL_INFO,
contexts.size() + " registered context products found."),
registeredProductsFile.toURI().toURL());
pList.add(p2);

} catch (SolrServerException | IOException ex) {
} catch (IOException ex) {
try {
ValidationProblem p = new ValidationProblem(new ProblemDefinition(ExceptionType.ERROR,
ProblemType.INTERNAL_ERROR,
"Error connecting to Registry to update registered context products config file. Verify internet connection and try again."),
new URL(url));
report.record(new URI(
System.getProperty("resources.home") + File.separator + ToolInfo.getOutputFileName()),
p);
registeredProductsFile.toURI().toURL());
report.record(registeredProductsFile.toURI(), p);
ex.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
reader.close();
}

try {
Expand All @@ -572,61 +582,63 @@ private void getLatestJsonContext() {
}
}

private void parseJsonObjectWriteTofile(SolrDocumentList docs) {
private String getSearchAfterFromDocument(Map<String, Object> document, String searchAfterKey) {
@SuppressWarnings("unchecked")
Map<String, Object> properties = (Map<String, Object>) document.get("properties");
return ((List<String>) properties.get(searchAfterKey)).get(0);
}

private void parseJsonObjectWriteTofile(List<Map<String, Object>> documents,
String contextJsonFilePath) {
final List<String> empty = Arrays.asList("N/A");
final List<String> fieldNames = Arrays.asList("pds:Airborne.pds", "pds:Facility.pds",
"pds:Instrument.pds", "pds:Instrument_Host.pds", "pds:Investigation.pds",
"pds:Resource.pds", "pds:Target.pds");
// backup old file
try {
if (registeredProductsFile.exists()) {
copyFile(registeredProductsFile, new File(System.getProperty("resources.home")
+ File.separator + ToolInfo.getOutputFileName() + ".backup"));
}
if (registeredProductsFile.exists()) {
copyFile(registeredProductsFile, new File(contextJsonFilePath + ".backup"));
}
} catch (IOException e) {
e.printStackTrace();
}

JsonWriter jsonWriter;
try {
jsonWriter = new JsonWriter(new FileWriter(
System.getProperty("resources.home") + File.separator + ToolInfo.getOutputFileName()));

jsonWriter = new JsonWriter(new FileWriter(contextJsonFilePath));
jsonWriter.setIndent(" ");
jsonWriter.beginObject(); // start Product_Context
jsonWriter.name("Product_Context");
jsonWriter.beginArray();
for (SolrDocument document : docs) {
String id = (String) document.getFirstValue("identifier");
String ver = (String) document.getFirstValue("version_id");
String data_type = (String) document.getFirstValue("data_product_type");
List<Object> names =
(ArrayList<Object>) document.getFieldValues(data_type.toLowerCase() + "_name");
List<Object> types =
(ArrayList<Object>) document.getFieldValues(data_type.toLowerCase() + "_type");

jsonWriter.beginObject(); // start a product

jsonWriter.name("name");
jsonWriter.beginArray();
if (names == null) {
jsonWriter.value("N/A");
} else {
for (Object n : names) {
jsonWriter.value((String) n);
}
}
jsonWriter.endArray();

jsonWriter.name("type");
jsonWriter.beginArray();
if (types == null) {
jsonWriter.value("N/A");
} else {
for (Object t : types) {
jsonWriter.value((String) t);
for (Map<String, Object> document : documents) {
@SuppressWarnings("unchecked")
Map<String, Object> properties = (Map<String, Object>) document.get("properties");
@SuppressWarnings("unchecked")
String lidvid = ((List<String>) properties.get("lidvid")).get(0);
for (String fieldName : fieldNames) {
if (properties.containsKey(fieldName + ":name")
|| properties.containsKey(fieldName + ":type")) {
@SuppressWarnings("unchecked")
List<Object> names = (List<Object>) properties.getOrDefault(fieldName + ":name", empty);
@SuppressWarnings("unchecked")
List<Object> types = (List<Object>) properties.getOrDefault(fieldName + ":type", empty);
jsonWriter.beginObject(); // start a product
jsonWriter.name("name");
jsonWriter.beginArray();
for (Object n : names) {
jsonWriter.value((String) n);
}
jsonWriter.endArray();
jsonWriter.name("type");
jsonWriter.beginArray();
for (Object t : types) {
jsonWriter.value((String) t);
}
jsonWriter.endArray();
jsonWriter.name("lidvid").value(lidvid);
jsonWriter.endObject(); // end a product
}
}
jsonWriter.endArray();

jsonWriter.name("lidvid").value(id + "::" + ver);
jsonWriter.endObject(); // end a product
}
jsonWriter.endArray();
jsonWriter.endObject(); // end Product_Context
Expand Down Expand Up @@ -683,12 +695,15 @@ public void query(File configuration) throws ConfigurationException {
while (keys.hasNext()) {
String key = keys.next();
if (!ConfigKey.ALL_KEYWORDS.contains(key)) {
if (unknowns.isBlank()) unknowns = key;
else unknowns += ", " + key;
if (unknowns.isBlank())
unknowns = key;
else
unknowns += ", " + key;
}
}
if (!unknowns.isBlank()) {
throw new UnrecognizedOptionException("Unrecognized keyword(s) in given configuration file: " + unknowns);
throw new UnrecognizedOptionException(
"Unrecognized keyword(s) in given configuration file: " + unknowns);
}

List<String> targetList = new ArrayList<>();
Expand Down Expand Up @@ -852,6 +867,8 @@ public void setAdditionalPaths(List<String> additionalPaths) throws MalformedURL
// must be further split using comma inside the for loop below.
LOG.debug("setAdditionalPaths:additionalPaths {},{}", additionalPaths, additionalPaths.size());
this.alternateReferentialPaths.clear();
while (alternateReferentialPaths.remove("")) {
}
for (String pathEntries : additionalPaths) {
LOG.debug("setAdditionalPaths:pathEntries {}", pathEntries);
// The value of pathEntries are comma separated values.
Expand Down Expand Up @@ -1303,7 +1320,8 @@ public void setupReport() throws IOException {
report.addParameter("userSpecifiedCatalogs", "User Specified Catalogs", catalogs.toString());
}
if (!schematrons.isEmpty()) {
report.addParameter("userSpecifiedSchematrons", "User Specified Schematrons", schematrons.toString());
report.addParameter("userSpecifiedSchematrons", "User Specified Schematrons",
schematrons.toString());
}
report.addParameter("severityLevel", "Severity Level", severity.getName());
report.addParameter("recurseDirectories", "Recurse Directories", String.valueOf(traverse));
Expand All @@ -1315,11 +1333,15 @@ public void setupReport() throws IOException {
* } else { report.addParameter(" Force Mode off"); }
*/
if (checksumManifest != null) {
report.addParameter("checksumManifestFile", "Checksum Manifest File", checksumManifest.toString());
report.addParameter("manifestFileBasePath", "Manifest File Base Path", manifestBasePath.toString());
}
report.addParameter("dataContentValidation", "Data Content Validation", contentValidationFlag ? "on" : "off");
report.addParameter("productLevelValidation", "Product Level Validation", skipProductValidation ? "off" : "on");
report.addParameter("checksumManifestFile", "Checksum Manifest File",
checksumManifest.toString());
report.addParameter("manifestFileBasePath", "Manifest File Base Path",
manifestBasePath.toString());
}
report.addParameter("dataContentValidation", "Data Content Validation",
contentValidationFlag ? "on" : "off");
report.addParameter("productLevelValidation", "Product Level Validation",
skipProductValidation ? "off" : "on");
if (everyN != 1) {
report.addParameter("dataEveryN", "Data Every N", String.valueOf(everyN));
}
Expand All @@ -1334,13 +1356,15 @@ public void setupReport() throws IOException {
}
if (validationRule != null && (validationRule.equalsIgnoreCase("pds4.bundle")
|| validationRule.equalsIgnoreCase("pds4.collection"))) {
report.addParameter("allowUnlabeledFiles", "Allow Unlabeled Files", String.valueOf(allowUnlabeledFiles));
report.addParameter("allowUnlabeledFiles", "Allow Unlabeled Files",
String.valueOf(allowUnlabeledFiles));
}
report.addParameter("maxErrors", "Max Errors", String.valueOf(maxErrors));
report.addParameter("registeredContextsFile", "Registered Contexts File", registeredProductsFile.toString());
report.addParameter("registeredContextsFile", "Registered Contexts File",
registeredProductsFile.toString());
if (nonRegisteredProductsFile != null) {
report
.addParameter("nonRegisteredContextsFile", "Non Registered Contexts File ", nonRegisteredProductsFile.toString());
report.addParameter("nonRegisteredContextsFile", "Non Registered Contexts File ",
nonRegisteredProductsFile.toString());
}
report.printHeader();
report.startBody("Product Level Validation Results");
Expand Down
6 changes: 3 additions & 3 deletions src/main/resources/validate.properties
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ validate.date=${buildNumber}
# TODO: Is this acceptable for validate.copyright?
validate.copyright=\nCopyright 2019, by the California Institute of Technology ("Caltech").\nAll rights reserved.

validate.search_url=https://pds.nasa.gov/services/search
validate.search_url=https://pds.nasa.gov/api
validate.output_file_name=registered_context_products.json
validate.endpoint=search
validate.query=product_class:Product_Context AND -data_class:Resource AND -data_class:PDS_Affiliate
validate.endpoint=search/1/products
validate.query=(product_class eq "Product_Context" and pds:Resource.pds:type ne "Information.Science_Portal" and pds:Resource.pds:type ne "Information.Investigation")