From 7fc08b0611c6f2f77d939844dab1a0b9b75c4616 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 26 Apr 2022 19:09:31 +0200 Subject: [PATCH] feat(solrteur): introduce a validator helper class #7662 The TSV parser needs to verify if a certain line is a header line and matching the spec. To avoid duplicated validation code, this validator can be used with an arbitrary list of strings (so it can be reused for blocks, fields and vocabularies). As we will need to validate URLs in certain fields, this validator also offers a helper function to create predicates checking for valid URLs. --- .../scripts/cli/util/model/Validator.java | 112 ++++++++++++++++++ .../java/cli/util/model/ValidatorTest.java | 96 +++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 modules/solr-configset/src/main/scripts/cli/util/model/Validator.java create mode 100644 modules/solr-configset/src/test/java/cli/util/model/ValidatorTest.java diff --git a/modules/solr-configset/src/main/scripts/cli/util/model/Validator.java b/modules/solr-configset/src/main/scripts/cli/util/model/Validator.java new file mode 100644 index 00000000000..8a6211797cf --- /dev/null +++ b/modules/solr-configset/src/main/scripts/cli/util/model/Validator.java @@ -0,0 +1,112 @@ +package cli.util.model; + +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +public class Validator { + + /** + * Test if a given value is a valid {@link java.net.URL} + * + * Remember, Java only supports HTTP/S, file and JAR protocols by default! + * Any URL not using such a protocol will not be considered a valid URL! + * {@see URL Constructor Summary} + * + * @param url The value to test + * @return True if valid URL, false otherwise + */ + public static boolean isValidUrl(String url) { + try { + new URL(url).toURI(); + return true; + } catch (MalformedURLException | URISyntaxException e) { + return false; + } + } + + /** + * Split and validate a textual line declared to be a header of custom metadata block definition section + * (the block, the fields or controlled vocabularies). Will return a list of the headers found (if they match) and + * when being a spec conform header line. + * + * As this function retrieves the relevant spec parts as parameters, it can be reused for all sections. + * You will need to transform into the resulting list into real Header enum values within calling code. + * + * This validator is strict with naming and order of appearance (must be same as spec), but is lenient + * about case (so you might use camel/pascal case variants). + * + * @param headerLine The textual line to analyse. + * @param startsWith A String which needs to be present at the start of the headerLine. + * @param validOrderedHeaders A list of Strings with the column headers from the spec in order of appearance. + * @return A list of the found headers in normalized form if matching the spec + * @throws ParserException If any validation fails. Contains sub-exceptions with validation details. + */ + static List validateHeaderLine(final String headerLine, + final String startsWith, + final List validOrderedHeaders) throws ParserException { + // start a parenting parser exception to be filled with errors as subexceptions + ParserException ex = new ParserException("contains an invalid column header"); + + if (headerLine == null || headerLine.isBlank()) { + ex.addSubException("Header may not be null, empty or whitespace only"); + throw ex; + } + + // the actual split and validate length + String[] headerSplit = headerLine.split(Constants.COLUMN_SEPARATOR); + // missing headers? + if (headerSplit.length < validOrderedHeaders.size()) { + ex.addSubException( + "Less fields (" + headerSplit.length + ") found than required (" + validOrderedHeaders.size() + ")."); + } else if (headerSplit.length > validOrderedHeaders.size()) { + ex.addSubException( + "More fields (" + headerSplit.length + ") found than required (" + validOrderedHeaders.size() + ")."); + } + + // allocate a list of validated columns + List validatedColumns = new ArrayList<>(); + + // iterate the found header values + for (int i = 0; i < headerSplit.length; i++) { + String columnHeader = headerSplit[i]; + + // is the value a valid one? (in order of appearance and existing, but ignoring case) + if (i < validOrderedHeaders.size() && validOrderedHeaders.get(i).equalsIgnoreCase(columnHeader)) { + // add as entry of validated and present headers (to be used for line mapping) + // BUT use the normalized variant (makes comparisons easier) + validatedColumns.add(validOrderedHeaders.get(i)); + // when invalid, mark as such + } else { + ex.addSubException( + "Column " + (i+1) + " contains '" + columnHeader + "', but spec expects " + + (i < validOrderedHeaders.size() ? "'"+validOrderedHeaders.get(i)+"'" : "nothing") + " to be here." + ); + // additional hint when valid, but accidentally already present + if (validatedColumns.stream().anyMatch(columnHeader::equalsIgnoreCase)) { + ex.addSubException("Column " + (i+1) + " contains valid '" + columnHeader + "' already present."); + } + } + } + + // when there are headers missing, report them + if ( validatedColumns.size() < validOrderedHeaders.size() ) { + for (int i = 0; i < validOrderedHeaders.size(); i++) { + String missingHeader = validOrderedHeaders.get(i); + if (validatedColumns.stream().noneMatch(missingHeader::equalsIgnoreCase)) { + ex.addSubException("Missing column '" + missingHeader + "' from position " + (i+1) + "."); + } + } + } + + // Will only return the header column mapping if and only if the validation did not find errors. + // use an unmodifiable version of the list to avoid accidents without notice. Else throw the exception. + if (ex.hasSubExceptions()) { + throw ex; + } else { + return List.copyOf(validatedColumns); + } + } +} diff --git a/modules/solr-configset/src/test/java/cli/util/model/ValidatorTest.java b/modules/solr-configset/src/test/java/cli/util/model/ValidatorTest.java new file mode 100644 index 00000000000..71e2333e4d8 --- /dev/null +++ b/modules/solr-configset/src/test/java/cli/util/model/ValidatorTest.java @@ -0,0 +1,96 @@ +package cli.util.model; + +import cli.util.TsvBlockReader; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.NullAndEmptySource; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class ValidatorTest { + + private static final Logger logger = Logger.getLogger(ValidatorTest.class.getCanonicalName()); + + @Nested + class UtilsTest { + @ParameterizedTest + @CsvSource(nullValues = "NULL", + value = { + "NULL,NULL", + "hello,hello", + "' hello',' hello'", + "' hello ',' hello '", + "' hello',' hello\t\t\t'", + "'\t\t\thello','\t\t\thello\t\t\t'", + "'\t\t\thello\ttest','\t\t\thello\ttest\t\t'", + "'\t\t\thello\ttest\t\t ','\t\t\thello\ttest\t\t '", + }) + void trimming(String expected, String sut) { + assertEquals(expected, TsvBlockReader.rtrimColumns(sut)); + } + + @ParameterizedTest + @CsvSource(nullValues = "NULL", + value = { + "false,NULL", + "false,''", + "false,hello", + "false,https://", + "false,www.foo.bar", + "false,://foo.bar.com", + "true,https://wwww.foobar.com", + "true,https://wwww.foobar.com/hello", + "true,https://wwww.foobar.com:1214/hello", + "true,https://host/hello", + }) + void urlValidation(boolean expected, String sut) { + assertEquals(expected, Validator.isValidUrl(sut)); + } + } + + @Nested + class ValidateBlockHeader { + List blockHeaders = Block.Header.getHeaders(); + + @ParameterizedTest + @NullAndEmptySource + @ValueSource(strings = { + "hello", + "#metadataBlock test", + "#metadataBlock\tname\tdataverseAlias\tdisplayName", + "\t#metadataBlock\tname\tdataverseAlias\tdisplayName\tblockURI", + "#metadataBlock\tname\tdataverseAlias\tdisplayName\tblockURI\tfoobar", + "#metadataBlock\tname\tdataverseAlias\tdisplayName\tdisplayName\tblockURI", + "dataverseAlias\tdisplayName\tblockURI\t#metadataBlock\tname" + }) + void validateHeaderLine_Block_Throws(String line) { + ParserException exception = assertThrows(ParserException.class, () -> Validator.validateHeaderLine(line, Block.TRIGGER, blockHeaders)); + assertTrue(exception.hasSubExceptions()); + logger.log(Level.FINE, + exception.getSubExceptions().stream().map(Throwable::getMessage).collect(Collectors.joining("\n")) + ); + } + + @ParameterizedTest + @ValueSource(strings = { + "#metadataBlock\tname\tdataverseAlias\tdisplayName\tblockURI", + "#metadataBlock\tNAME\tDataversealias\tDisplayname\tBlockURI" + }) + void validateHeaderLine_Block_True(String line) throws ParserException { + List headers = Validator.validateHeaderLine(line, Block.TRIGGER, blockHeaders); + assertFalse(headers.isEmpty()); + // we expect the normalized form, so the arrays should match! + assertEquals(blockHeaders, headers); + } + } +} \ No newline at end of file