forked from IQSS/dataverse
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(solrteur): introduce state machine for TSV file layout IQSS#7662
Our custom metadata block TSV files follow a certain order of things. We also do not allow for repetitions or similar. All of this can be most easily be depicted with a state maschine, so we know where to send a line to for parsing. This commit also adds the very basic (empty) POJOs to store the block, fields and vocabularies in to enable testing the state transition. It also adds constants we rely on, like what's the trigger char, the comment intro and the field delimiter
- Loading branch information
1 parent
39acda2
commit a5efc7e
Showing
6 changed files
with
151 additions
and
0 deletions.
There are no files selected for viewing
12 changes: 12 additions & 0 deletions
12
modules/solr-configset/src/main/scripts/cli/util/model/Block.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package cli.util.model; | ||
|
||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
public final class Block { | ||
public static final String TRIGGER = Constants.TRIGGER_INDICATOR + "metadataBlock"; | ||
|
||
private Block() {} | ||
|
||
Optional<List<Field>> fields = Optional.empty(); | ||
} |
7 changes: 7 additions & 0 deletions
7
modules/solr-configset/src/main/scripts/cli/util/model/Constants.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package cli.util.model; | ||
|
||
public class Constants { | ||
public static final String COMMENT_INDICATOR = "%%"; | ||
public static final String TRIGGER_INDICATOR = "#"; | ||
public static final String COLUMN_SEPARATOR = "\t"; | ||
} |
5 changes: 5 additions & 0 deletions
5
modules/solr-configset/src/main/scripts/cli/util/model/ControlledVocabulary.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package cli.util.model; | ||
|
||
public class ControlledVocabulary { | ||
public static final String TRIGGER = Constants.TRIGGER_INDICATOR + "controlledVocabulary"; | ||
} |
12 changes: 12 additions & 0 deletions
12
modules/solr-configset/src/main/scripts/cli/util/model/Field.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package cli.util.model; | ||
|
||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
public class Field { | ||
public static final String TRIGGER = Constants.TRIGGER_INDICATOR + "datasetField"; | ||
|
||
private Field() {} | ||
|
||
Optional<List<ControlledVocabulary>> controlledVocabularyValues = Optional.empty(); | ||
} |
45 changes: 45 additions & 0 deletions
45
modules/solr-configset/src/main/scripts/cli/util/model/ParsingState.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package cli.util.model; | ||
|
||
import cli.util.TsvBlockReader; | ||
|
||
public enum ParsingState { | ||
Vocabularies(ControlledVocabulary.TRIGGER), | ||
Fields(Field.TRIGGER, Vocabularies), | ||
MetadataBlock(Block.TRIGGER, Fields), | ||
// This state is only used exactly once and should never be reached from input. | ||
// For safety, make the validation fail. | ||
Init(Constants.COMMENT_INDICATOR, MetadataBlock); | ||
|
||
private final String stateTrigger; | ||
private final ParsingState nextState; | ||
|
||
ParsingState(String trigger, ParsingState next) { | ||
this.stateTrigger = trigger; | ||
this.nextState = next; | ||
} | ||
|
||
/** | ||
* Create final state (no next step) | ||
* @param trigger | ||
*/ | ||
ParsingState(String trigger) { | ||
this.stateTrigger = trigger; | ||
this.nextState = this; | ||
} | ||
|
||
public boolean isAllowedFinalState() { | ||
return this == Fields || this == Vocabularies; | ||
} | ||
|
||
public ParsingState transitionState(String headerLine) throws ParserException { | ||
// if not null, not starting the same state again (no loops allowed) and starting the correct next state, return the next state | ||
if(headerLine != null && ! headerLine.startsWith(this.stateTrigger) && | ||
headerLine.startsWith(this.nextState.stateTrigger)) { | ||
return this.nextState; | ||
} | ||
// otherwise throw a parsing exception | ||
throw new ParserException("Invalid header '" + | ||
(headerLine == null ? "null" : headerLine.substring(0, Math.min(25, headerLine.length()))) + | ||
"...' while in section '" + this.stateTrigger + "'"); | ||
} | ||
} |
70 changes: 70 additions & 0 deletions
70
modules/solr-configset/src/test/java/cli/util/model/ParsingStateTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package cli.util.model; | ||
|
||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.Arguments; | ||
import org.junit.jupiter.params.provider.MethodSource; | ||
|
||
import java.util.stream.Stream; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertThrows; | ||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
class ParsingStateTest { | ||
|
||
static Stream<Arguments> failingStateTransitionExamples() { | ||
return Stream.of( | ||
Arguments.of(ParsingState.Init, null), | ||
Arguments.of(ParsingState.MetadataBlock, null), | ||
Arguments.of(ParsingState.Fields, null), | ||
Arguments.of(ParsingState.Vocabularies, null), | ||
|
||
Arguments.of(ParsingState.Init, ""), | ||
Arguments.of(ParsingState.MetadataBlock, ""), | ||
Arguments.of(ParsingState.Fields, ""), | ||
Arguments.of(ParsingState.Vocabularies, ""), | ||
|
||
Arguments.of(ParsingState.Init, "foobar"), | ||
Arguments.of(ParsingState.MetadataBlock, "foobar"), | ||
Arguments.of(ParsingState.Fields, "foobar"), | ||
Arguments.of(ParsingState.Vocabularies, "foobar"), | ||
|
||
Arguments.of(ParsingState.Init, Constants.TRIGGER_INDICATOR), | ||
Arguments.of(ParsingState.Init, Constants.COMMENT_INDICATOR), | ||
Arguments.of(ParsingState.Init, Constants.COLUMN_SEPARATOR), | ||
|
||
Arguments.of(ParsingState.Init, Field.TRIGGER), | ||
Arguments.of(ParsingState.Init, ControlledVocabulary.TRIGGER), | ||
|
||
Arguments.of(ParsingState.MetadataBlock, Constants.COMMENT_INDICATOR), | ||
Arguments.of(ParsingState.MetadataBlock, ControlledVocabulary.TRIGGER), | ||
|
||
Arguments.of(ParsingState.Fields, Constants.COMMENT_INDICATOR), | ||
Arguments.of(ParsingState.Fields, Block.TRIGGER), | ||
|
||
Arguments.of(ParsingState.Vocabularies, Constants.COMMENT_INDICATOR), | ||
Arguments.of(ParsingState.Vocabularies, Block.TRIGGER), | ||
Arguments.of(ParsingState.Vocabularies, Field.TRIGGER) | ||
); | ||
} | ||
|
||
@ParameterizedTest | ||
@MethodSource("failingStateTransitionExamples") | ||
void failingTransitions(ParsingState source, String triggerLine) throws ParserException { | ||
ParserException ex = assertThrows(ParserException.class, () -> source.transitionState(triggerLine)); | ||
} | ||
|
||
static Stream<Arguments> successfulStateTransitionExamples() { | ||
return Stream.of( | ||
Arguments.of(ParsingState.Init, Block.TRIGGER, ParsingState.MetadataBlock), | ||
Arguments.of(ParsingState.MetadataBlock, Field.TRIGGER, ParsingState.Fields), | ||
Arguments.of(ParsingState.Fields, ControlledVocabulary.TRIGGER, ParsingState.Vocabularies) | ||
); | ||
} | ||
|
||
@ParameterizedTest | ||
@MethodSource("successfulStateTransitionExamples") | ||
void successfulTransitions(ParsingState source, String triggerLine, ParsingState expected) throws ParserException { | ||
assertEquals(expected, source.transitionState(triggerLine)); | ||
} | ||
|
||
} |