forked from IQSS/dataverse
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(solrteur): introduce a validator helper class IQSS#7662
The TSV parser needs to verify if a certain line is a header line and matching the spec. To avoid duplicated validation code, this validator can be used with an arbitrary list of strings (so it can be reused for blocks, fields and vocabularies). As we will need to validate URLs in certain fields, this validator also offers a helper function to create predicates checking for valid URLs.
- Loading branch information
1 parent
a5efc7e
commit 7fc08b0
Showing
2 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
112 changes: 112 additions & 0 deletions
112
modules/solr-configset/src/main/scripts/cli/util/model/Validator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package cli.util.model; | ||
|
||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class Validator { | ||
|
||
/** | ||
* Test if a given value is a valid {@link java.net.URL} | ||
* | ||
* Remember, Java only supports HTTP/S, file and JAR protocols by default! | ||
* Any URL not using such a protocol will not be considered a valid URL! | ||
* {@see <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URL.html#%3Cinit%3E(java.lang.String,java.lang.String,int,java.lang.String)">URL Constructor Summary</a>} | ||
* | ||
* @param url The value to test | ||
* @return True if valid URL, false otherwise | ||
*/ | ||
public static boolean isValidUrl(String url) { | ||
try { | ||
new URL(url).toURI(); | ||
return true; | ||
} catch (MalformedURLException | URISyntaxException e) { | ||
return false; | ||
} | ||
} | ||
|
||
/** | ||
* Split and validate a textual line declared to be a header of custom metadata block definition section | ||
* (the block, the fields or controlled vocabularies). Will return a list of the headers found (if they match) and | ||
* when being a spec conform header line. | ||
* | ||
* As this function retrieves the relevant spec parts as parameters, it can be reused for all sections. | ||
* You will need to transform into the resulting list into real Header enum values within calling code. | ||
* | ||
* This validator is strict with naming and order of appearance (must be same as spec), but is lenient | ||
* about case (so you might use camel/pascal case variants). | ||
* | ||
* @param headerLine The textual line to analyse. | ||
* @param startsWith A String which needs to be present at the start of the headerLine. | ||
* @param validOrderedHeaders A list of Strings with the column headers from the spec in order of appearance. | ||
* @return A list of the found headers in normalized form if matching the spec | ||
* @throws ParserException If any validation fails. Contains sub-exceptions with validation details. | ||
*/ | ||
static List<String> validateHeaderLine(final String headerLine, | ||
final String startsWith, | ||
final List<String> validOrderedHeaders) throws ParserException { | ||
// start a parenting parser exception to be filled with errors as subexceptions | ||
ParserException ex = new ParserException("contains an invalid column header"); | ||
|
||
if (headerLine == null || headerLine.isBlank()) { | ||
ex.addSubException("Header may not be null, empty or whitespace only"); | ||
throw ex; | ||
} | ||
|
||
// the actual split and validate length | ||
String[] headerSplit = headerLine.split(Constants.COLUMN_SEPARATOR); | ||
// missing headers? | ||
if (headerSplit.length < validOrderedHeaders.size()) { | ||
ex.addSubException( | ||
"Less fields (" + headerSplit.length + ") found than required (" + validOrderedHeaders.size() + ")."); | ||
} else if (headerSplit.length > validOrderedHeaders.size()) { | ||
ex.addSubException( | ||
"More fields (" + headerSplit.length + ") found than required (" + validOrderedHeaders.size() + ")."); | ||
} | ||
|
||
// allocate a list of validated columns | ||
List<String> validatedColumns = new ArrayList<>(); | ||
|
||
// iterate the found header values | ||
for (int i = 0; i < headerSplit.length; i++) { | ||
String columnHeader = headerSplit[i]; | ||
|
||
// is the value a valid one? (in order of appearance and existing, but ignoring case) | ||
if (i < validOrderedHeaders.size() && validOrderedHeaders.get(i).equalsIgnoreCase(columnHeader)) { | ||
// add as entry of validated and present headers (to be used for line mapping) | ||
// BUT use the normalized variant (makes comparisons easier) | ||
validatedColumns.add(validOrderedHeaders.get(i)); | ||
// when invalid, mark as such | ||
} else { | ||
ex.addSubException( | ||
"Column " + (i+1) + " contains '" + columnHeader + "', but spec expects " + | ||
(i < validOrderedHeaders.size() ? "'"+validOrderedHeaders.get(i)+"'" : "nothing") + " to be here." | ||
); | ||
// additional hint when valid, but accidentally already present | ||
if (validatedColumns.stream().anyMatch(columnHeader::equalsIgnoreCase)) { | ||
ex.addSubException("Column " + (i+1) + " contains valid '" + columnHeader + "' already present."); | ||
} | ||
} | ||
} | ||
|
||
// when there are headers missing, report them | ||
if ( validatedColumns.size() < validOrderedHeaders.size() ) { | ||
for (int i = 0; i < validOrderedHeaders.size(); i++) { | ||
String missingHeader = validOrderedHeaders.get(i); | ||
if (validatedColumns.stream().noneMatch(missingHeader::equalsIgnoreCase)) { | ||
ex.addSubException("Missing column '" + missingHeader + "' from position " + (i+1) + "."); | ||
} | ||
} | ||
} | ||
|
||
// Will only return the header column mapping if and only if the validation did not find errors. | ||
// use an unmodifiable version of the list to avoid accidents without notice. Else throw the exception. | ||
if (ex.hasSubExceptions()) { | ||
throw ex; | ||
} else { | ||
return List.copyOf(validatedColumns); | ||
} | ||
} | ||
} |
96 changes: 96 additions & 0 deletions
96
modules/solr-configset/src/test/java/cli/util/model/ValidatorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package cli.util.model; | ||
|
||
import cli.util.TsvBlockReader; | ||
import org.junit.jupiter.api.Nested; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.CsvSource; | ||
import org.junit.jupiter.params.provider.NullAndEmptySource; | ||
import org.junit.jupiter.params.provider.ValueSource; | ||
|
||
import java.util.List; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertFalse; | ||
import static org.junit.jupiter.api.Assertions.assertThrows; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
class ValidatorTest { | ||
|
||
private static final Logger logger = Logger.getLogger(ValidatorTest.class.getCanonicalName()); | ||
|
||
@Nested | ||
class UtilsTest { | ||
@ParameterizedTest | ||
@CsvSource(nullValues = "NULL", | ||
value = { | ||
"NULL,NULL", | ||
"hello,hello", | ||
"' hello',' hello'", | ||
"' hello ',' hello '", | ||
"' hello',' hello\t\t\t'", | ||
"'\t\t\thello','\t\t\thello\t\t\t'", | ||
"'\t\t\thello\ttest','\t\t\thello\ttest\t\t'", | ||
"'\t\t\thello\ttest\t\t ','\t\t\thello\ttest\t\t '", | ||
}) | ||
void trimming(String expected, String sut) { | ||
assertEquals(expected, TsvBlockReader.rtrimColumns(sut)); | ||
} | ||
|
||
@ParameterizedTest | ||
@CsvSource(nullValues = "NULL", | ||
value = { | ||
"false,NULL", | ||
"false,''", | ||
"false,hello", | ||
"false,https://", | ||
"false,www.foo.bar", | ||
"false,://foo.bar.com", | ||
"true,https://wwww.foobar.com", | ||
"true,https://wwww.foobar.com/hello", | ||
"true,https://wwww.foobar.com:1214/hello", | ||
"true,https://host/hello", | ||
}) | ||
void urlValidation(boolean expected, String sut) { | ||
assertEquals(expected, Validator.isValidUrl(sut)); | ||
} | ||
} | ||
|
||
@Nested | ||
class ValidateBlockHeader { | ||
List<String> blockHeaders = Block.Header.getHeaders(); | ||
|
||
@ParameterizedTest | ||
@NullAndEmptySource | ||
@ValueSource(strings = { | ||
"hello", | ||
"#metadataBlock test", | ||
"#metadataBlock\tname\tdataverseAlias\tdisplayName", | ||
"\t#metadataBlock\tname\tdataverseAlias\tdisplayName\tblockURI", | ||
"#metadataBlock\tname\tdataverseAlias\tdisplayName\tblockURI\tfoobar", | ||
"#metadataBlock\tname\tdataverseAlias\tdisplayName\tdisplayName\tblockURI", | ||
"dataverseAlias\tdisplayName\tblockURI\t#metadataBlock\tname" | ||
}) | ||
void validateHeaderLine_Block_Throws(String line) { | ||
ParserException exception = assertThrows(ParserException.class, () -> Validator.validateHeaderLine(line, Block.TRIGGER, blockHeaders)); | ||
assertTrue(exception.hasSubExceptions()); | ||
logger.log(Level.FINE, | ||
exception.getSubExceptions().stream().map(Throwable::getMessage).collect(Collectors.joining("\n")) | ||
); | ||
} | ||
|
||
@ParameterizedTest | ||
@ValueSource(strings = { | ||
"#metadataBlock\tname\tdataverseAlias\tdisplayName\tblockURI", | ||
"#metadataBlock\tNAME\tDataversealias\tDisplayname\tBlockURI" | ||
}) | ||
void validateHeaderLine_Block_True(String line) throws ParserException { | ||
List<String> headers = Validator.validateHeaderLine(line, Block.TRIGGER, blockHeaders); | ||
assertFalse(headers.isEmpty()); | ||
// we expect the normalized form, so the arrays should match! | ||
assertEquals(blockHeaders, headers); | ||
} | ||
} | ||
} |