Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PDS label file paths and manifest files as additional possible inputs #745

Merged
merged 6 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions src/main/java/gov/nasa/pds/validate/ri/CommandLineInterface.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ public class CommandLineInterface {
public CommandLineInterface() {
super();
this.opts = new Options();

// Disabling this argument for the time being since the Search API does not yet support authorized access
this.opts.addOption(Option.builder("A").argName("auth-file").desc(
"file with the URL and credential content to have full (all product states) read-only access to the registry API")
"file with the URL and credential content to have full (all product states) read-only access to the Registry Search API")
.hasArg(true).longOpt("auth-api").numberOfArgs(1).optionalArg(true).build());

this.opts.addOption(Option.builder("a").argName("auth-file").desc(
"file with the URL and credential content to have full, direct read-only access to the search DB")
"file with the URL and credential content to have full, direct read-only access to the Registry OpenSearch DB")
.hasArg(true).longOpt("auth-opensearch").numberOfArgs(1).optionalArg(true).build());
this.opts.addOption(Option.builder("h").desc("show this text and exit").hasArg(false)
.longOpt("help").optionalArg(true).build());
Expand All @@ -40,10 +43,27 @@ public CommandLineInterface() {
}

public void help() {
new HelpFormatter().printHelp("ValidateReferenceIntegrity",
"\nChecks the search DB that all references exist. If the api-auth is provided, then it will also check that the registry API also finds all the references. For lidvid, multiple values can be given using a comma like 'urn:foo::1.0,urn:bar::2.0'.\n\n",
new HelpFormatter().printHelp("validate-refs LIDVID LABEL-FILEPATH MANIFEST-FILEPATH",
"\nChecks that (1) all product references within a given product and " +
"(2) any aggregrate product references (bundles -> collections -> products) " +
"exist in the Registry OpenSearch DB or Search API. \n\n" +
"Expected positional arguments are either a LIDVID, LABEL-FILEPATH, or MANIFEST-FILEPATH.\n" +
" - A LIDVID must start with urn:.\n" +
" - A LABEL-FILEPATH must be a well formed PDS XML file.\n" +
" - A MANIFEST-FILEPATH is one item per line with an item being a lidvid or label. Each line must be terminated by a LF.\n\n" +
"Multiple arguments may be given in any order, for example:\n" +
" > validate-refs urn:nasa:pds:foo::1.0 label.xml urn:nasa:pds:bar::2.0 manifest.txt\n\n",
opts,
"\nAn auth-file is either a text file of the Java property format with two variables: 'url' and 'credentials'. The 'url' property should be the complete base URL to the Registry Search endpoint or Search API, e.g. 'https://localhost:9876/base', and 'credentials' a path to a java property file with the user name, password, and other credential information as that used by harvest. Or it is an XML text file used by harvest with <registry> containing the 'auth' attribute.\n\n",
"\nAn auth-file is either a text file of the Java property format " +
"with two variables, 'url' and 'credentials': \n\n" +
" - The 'url' property is the complete base URL to the Registry OpenSearch endpoint or Search API\n" +
" * 'https://my-registry.es.amazonaws.com/_search'\n\n" +
" - The 'credentials' is the path to:\n" +
" * Harvest config file containing the necessary Registry OpenSearch authorization\n" +
" <registry url=\"http://localhost:9200\" index=\"registry\" auth=\"/path/to/auth.cfg\" />\n" +
" * Java Properties file with a 'user' and 'password' specified, for example: \n" +
" user=janedoe\n" +
" password=mypassword\n\n",
true);
}

Expand All @@ -68,13 +88,18 @@ public int process(String[] args)
if (cl.hasOption("verbose"))
loggerConfig.setLevel(Level.INFO);
ctx.updateLoggers();

// Disabling this argument for the time being since the Search API does not yet support authorized access
this.opts.addOption(Option.builder("A").argName("auth-file").desc(
"file with the URL and credential content to have full (all product states) read-only access to the Registry Search API")
.hasArg(true).longOpt("auth-api").numberOfArgs(1).optionalArg(true).build());
if (!cl.hasOption("a"))
throw new ParseException(
"Not yet implemented. Must provide OpenSearch Registry authorization information.");
throw new ParseException("Not yet implemented. Must provide OpenSearch Registry authorization information.");

if (cl.getArgList().size() < 1)
throw new ParseException("Must provide at least one LIDVID as a starting point.");
throw new ParseException("Must provide at least one LIDVID, Label file path, or manifest file path as a starting point.");
if (!cl.hasOption("A"))
log.warn("Using OpenSearch Registry to check references.");
log.warn("Using Registry OpenSearch Database to check references.");

if (cl.hasOption("t")) {
try {
Expand All @@ -90,7 +115,7 @@ public int process(String[] args)

this.log.info("Starting the reference integrity checks.");
try {
Engine engine = new Engine(cylinders, cl.getArgList(),
Engine engine = new Engine(cylinders, UserInput.toLidvids (cl.getArgList()),
AuthInformation.buildFrom(cl.getOptionValue("auth-api", "")),
AuthInformation.buildFrom(cl.getOptionValue("auth-opensearch")));
engine.processQueueUntilEmpty();
Expand Down
70 changes: 70 additions & 0 deletions src/main/java/gov/nasa/pds/validate/ri/UserInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package gov.nasa.pds.validate.ri;

import java.io.File;
import java.io.FileReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.transform.sax.SAXSource;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.InputSource;
import gov.nasa.pds.tools.util.LabelParser;
import gov.nasa.pds.tools.util.XMLExtractor;
import net.sf.saxon.om.TreeInfo;
import net.sf.saxon.tree.tiny.TinyNodeImpl;

class UserInput {
final private Logger log = LogManager.getLogger(UserInput.class);
public String labels_lidvid = "";
public static List<String> toLidvids (List<String> cliList){
return new UserInput().process (cliList);
}
private List<String> expandManifest (String cliArg) {
File file = new File(cliArg);
List<String> lidvids = new ArrayList<String>();
if (file.exists()) {
try (Stream<String> lines = Files.lines(Paths.get(cliArg))) {
lidvids.addAll (this.process(lines.collect(Collectors.toList())));
} catch (Exception e) {
log.warn ("The argument '" + cliArg + "' does not look like a LIDVID, Label, or manifest file. Ignoring it.");
}
}
return lidvids;
}
private boolean isLabel (String cliArg) {
if (cliArg.endsWith(".xml") || cliArg.endsWith (".lblx")) {
File file = new File(cliArg);
if (file.exists()) {
try {
SAXSource saxSource = new SAXSource(new InputSource(new FileReader(file)));
TreeInfo docInfo = LabelParser.parse(saxSource); // Parses a label.
List<TinyNodeImpl> xmlModels = new ArrayList<>();
XMLExtractor extractor = new XMLExtractor(docInfo.getRootNode());
xmlModels = extractor.getNodesFromDoc("logical_identifier");
this.labels_lidvid = xmlModels.get(0).getStringValue();
return true;
} catch (Exception e) {
return false;
}
}
}
return false;
}
private List<String> process (List<String> cliList){
List<String> lidvids = new ArrayList<String>();
for (String cliArg : cliList) {
if (cliArg.startsWith ("urn:")) {
lidvids.add (cliArg);
} else if (this.isLabel (cliArg)) {
lidvids.add (this.labels_lidvid);
} else {
lidvids.addAll (this.expandManifest (cliArg));
}
}
return lidvids;
}
}
2 changes: 2 additions & 0 deletions src/test/resources/riut/manifest.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
urn:nasa:pds:insight_rad::2.1
urn:nasa:pds:insight_rad::2.0