sentence : sentences) {
- sBegin = -1;
-
- for (String token : sentence) {
- tBegin = aText.indexOf(token, tEnd);
- tEnd = tBegin + token.length();
-
- if (sBegin == -1) {
- sBegin = tBegin;
- }
-
- createToken(aJCas, aZoneBegin + tBegin, aZoneBegin + tEnd);
- }
- sEnd = tEnd;
-
- createSentence(aJCas, aZoneBegin + sBegin, aZoneBegin + sEnd);
- }
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/main/java/org/dkpro/core/clearnlp/ClearNlpSemanticRoleLabeler.java b/dkpro-core-clearnlp-asl/src/main/java/org/dkpro/core/clearnlp/ClearNlpSemanticRoleLabeler.java
deleted file mode 100644
index 75b648343e..0000000000
--- a/dkpro-core-clearnlp-asl/src/main/java/org/dkpro/core/clearnlp/ClearNlpSemanticRoleLabeler.java
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.clearnlp;
-
-import static java.util.Arrays.asList;
-import static org.apache.commons.io.IOUtils.closeQuietly;
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.apache.uima.fit.util.JCasUtil.selectCovered;
-import static org.apache.uima.util.Level.INFO;
-
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.zip.GZIPInputStream;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.descriptor.TypeCapability;
-import org.apache.uima.fit.util.FSCollectionFactory;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.dkpro.core.api.parameter.ComponentParameters;
-import org.dkpro.core.api.resources.CasConfigurableProviderBase;
-import org.dkpro.core.api.resources.CasConfigurableStreamProviderBase;
-
-import com.clearnlp.classification.model.StringModel;
-import com.clearnlp.component.AbstractComponent;
-import com.clearnlp.component.AbstractStatisticalComponent;
-import com.clearnlp.dependency.DEPArc;
-import com.clearnlp.dependency.DEPLib;
-import com.clearnlp.dependency.DEPNode;
-import com.clearnlp.dependency.DEPTree;
-import com.clearnlp.nlp.NLPGetter;
-import com.clearnlp.nlp.NLPMode;
-
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg;
-import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink;
-import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT;
-import eu.openminted.share.annotations.api.Component;
-import eu.openminted.share.annotations.api.DocumentationResource;
-import eu.openminted.share.annotations.api.constants.OperationType;
-
-/**
- * ClearNLP semantic role labeller.
- */
-@Component(OperationType.ANNOTATOR_OF_SEMANTIC_ROLE_LABELS)
-@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
-@ResourceMetaData(name = "ClearNLP Semantic Role Labeler")
-@TypeCapability(
- inputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
- "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma",
- "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency"},
- outputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred",
- "de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg"}
- )
-public class ClearNlpSemanticRoleLabeler
- extends JCasAnnotator_ImplBase
-{
- /**
- * Write the tag set(s) to the log when a model is loaded.
- */
- public static final String PARAM_PRINT_TAGSET = ComponentParameters.PARAM_PRINT_TAGSET;
- @ConfigurationParameter(name = PARAM_PRINT_TAGSET, mandatory = true, defaultValue = "false")
- protected boolean printTagSet;
-
- /**
- * Use this language instead of the document language to resolve the model.
- */
- public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
- @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
- protected String language;
-
- /**
- * Variant of a model the model. Used to address a specific model if here are multiple models
- * for one language.
- */
- public static final String PARAM_VARIANT = ComponentParameters.PARAM_VARIANT;
- @ConfigurationParameter(name = PARAM_VARIANT, mandatory = false)
- protected String variant;
-
- /**
- * Location from which the predicate identifier model is read.
- */
- public static final String PARAM_PRED_MODEL_LOCATION = "predModelLocation";
- @ConfigurationParameter(name = PARAM_PRED_MODEL_LOCATION, mandatory = false)
- protected String predModelLocation;
-
- /**
- * Location from which the roleset classification model is read.
- */
- public static final String PARAM_ROLE_MODEL_LOCATION = "roleModelLocation";
- @ConfigurationParameter(name = PARAM_ROLE_MODEL_LOCATION, mandatory = false)
- protected String roleModelLocation;
-
- /**
- * Location from which the semantic role labeling model is read.
- */
- public static final String PARAM_SRL_MODEL_LOCATION = "srlModelLocation";
- @ConfigurationParameter(name = PARAM_SRL_MODEL_LOCATION, mandatory = false)
- protected String srlModelLocation;
-
- /**
- * Normally the arguments point only to the head words of arguments in the dependency tree.
- * With this option enabled, they are expanded to the text covered by the minimal and maximal
- * token offsets of all descendants (or self) of the head word.
- *
- * Warning: this parameter should be used with caution! For one, if the descentants of a
- * head word cover a non-continuous region of the text, this information is lost. The arguments
- * will appear to be spanning a continuous region. For another, the arguments may overlap with
- * each other. E.g. if a sentence contains a relative clause with a verb, the subject of the
- * main clause may be recognized as a dependent of the verb and may cause the whole main
- * clause to be recorded in the argument.
- */
- public static final String PARAM_EXPAND_ARGUMENTS = "expandArguments";
- @ConfigurationParameter(name = PARAM_EXPAND_ARGUMENTS, mandatory = true, defaultValue = "false")
- protected boolean expandArguments;
-
-
- private CasConfigurableProviderBase predicateFinder;
-
- private CasConfigurableProviderBase roleSetClassifier;
-
- private CasConfigurableProviderBase roleLabeller;
-
- @Override
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
- {
- super.initialize(aContext);
-
- predicateFinder = new CasConfigurableStreamProviderBase()
- {
- {
- setContextObject(ClearNlpSemanticRoleLabeler.this);
-
- setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
- setDefault(ARTIFACT_ID, "${groupId}.clearnlp-model-pred-${language}-${variant}");
- setDefault(LOCATION,
- "classpath:/de/tudarmstadt/ukp/dkpro/core/clearnlp/lib/pred-${language}-${variant}.properties");
- setDefault(VARIANT, "ontonotes");
-
- setOverride(LOCATION, predModelLocation);
- setOverride(LANGUAGE, language);
- setOverride(VARIANT, variant);
- }
-
- @Override
- protected AbstractComponent produceResource(InputStream aStream)
- throws Exception
- {
- BufferedInputStream bis = null;
- ObjectInputStream ois = null;
- GZIPInputStream gis = null;
- try {
- gis = new GZIPInputStream(aStream);
- bis = new BufferedInputStream(gis);
- ois = new ObjectInputStream(bis);
- AbstractComponent component = NLPGetter.getComponent(ois,
- getAggregatedProperties().getProperty(LANGUAGE), NLPMode.MODE_PRED);
- printTags(NLPMode.MODE_PRED, component);
- return component;
- }
- catch (Exception e) {
- throw new IOException(e);
- }
- finally {
- closeQuietly(ois);
- closeQuietly(bis);
- closeQuietly(gis);
- }
- }
- };
-
- roleSetClassifier = new CasConfigurableStreamProviderBase()
- {
- {
- setContextObject(ClearNlpSemanticRoleLabeler.this);
-
- setDefault(ARTIFACT_ID, "${groupId}.clearnlp-model-role-${language}-${variant}");
- setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/clearnlp/lib/"
- + "role-${language}-${variant}.properties");
- setDefault(VARIANT, "ontonotes");
-
- setOverride(LOCATION, roleModelLocation);
- setOverride(LANGUAGE, language);
- setOverride(VARIANT, variant);
- }
-
- @Override
- protected AbstractComponent produceResource(InputStream aStream)
- throws Exception
- {
- BufferedInputStream bis = null;
- ObjectInputStream ois = null;
- GZIPInputStream gis = null;
- try {
- gis = new GZIPInputStream(aStream);
- bis = new BufferedInputStream(gis);
- ois = new ObjectInputStream(bis);
- AbstractComponent component = NLPGetter.getComponent(ois,
- getAggregatedProperties().getProperty(LANGUAGE), NLPMode.MODE_ROLE);
-
- printTags(NLPMode.MODE_ROLE, component);
- return component;
- }
- catch (Exception e) {
- throw new IOException(e);
- }
- finally {
- closeQuietly(ois);
- closeQuietly(bis);
- closeQuietly(gis);
- }
- }
- };
-
- roleLabeller = new CasConfigurableStreamProviderBase()
- {
- {
- setContextObject(ClearNlpSemanticRoleLabeler.this);
-
- setDefault(ARTIFACT_ID, "${groupId}.clearnlp-model-srl-${language}-${variant}");
- setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/clearnlp/lib/"
- + "srl-${language}-${variant}.properties");
- setDefault(VARIANT, "ontonotes");
-
- setOverride(LOCATION, srlModelLocation);
- setOverride(LANGUAGE, language);
- setOverride(VARIANT, variant);
- }
-
- @Override
- protected AbstractComponent produceResource(InputStream aStream)
- throws Exception
- {
- BufferedInputStream bis = null;
- ObjectInputStream ois = null;
- GZIPInputStream gis = null;
- try {
- gis = new GZIPInputStream(aStream);
- bis = new BufferedInputStream(gis);
- ois = new ObjectInputStream(bis);
- AbstractComponent component = NLPGetter.getComponent(ois,
- getAggregatedProperties().getProperty(LANGUAGE), NLPMode.MODE_SRL);
- printTags(NLPMode.MODE_SRL, component);
- return component;
- }
- catch (Exception e) {
- throw new IOException(e);
- }
- finally {
- closeQuietly(ois);
- closeQuietly(bis);
- closeQuietly(gis);
- }
- }
- };
- }
-
- @Override
- public void process(JCas aJCas)
- throws AnalysisEngineProcessException
- {
- predicateFinder.configure(aJCas.getCas());
- roleSetClassifier.configure(aJCas.getCas());
- roleLabeller.configure(aJCas.getCas());
-
- // Iterate over all sentences
- for (Sentence sentence : select(aJCas, Sentence.class)) {
- List tokens = selectCovered(aJCas, Token.class, sentence);
- DEPTree tree = new DEPTree();
-
- // Generate:
- // - DEPNode
- // - pos tags
- // - lemma
- for (int i = 0; i < tokens.size(); i++) {
- Token t = tokens.get(i);
- DEPNode node = new DEPNode(i + 1, tokens.get(i).getText());
- node.pos = t.getPos().getPosValue();
- node.lemma = t.getLemma().getValue();
- tree.add(node);
- }
-
- // Generate:
- // Dependency relations
- for (Dependency dep : selectCovered(Dependency.class, sentence)) {
- if (dep instanceof ROOT) {
- // #736 ClearNlpSemanticRoleLabelerTest gets caught in infinite loop
- // ClearNLP parser creates roots that do not have a head. We have to replicate
- // this here to avoid running into an endless loop.
- continue;
- }
-
- int headIndex = tokens.indexOf(dep.getGovernor());
- int tokenIndex = tokens.indexOf(dep.getDependent());
-
- DEPNode token = tree.get(tokenIndex + 1);
- DEPNode head = tree.get(headIndex + 1);
-
- token.setHead(head, dep.getDependencyType());
- }
-
- // For the root node
- for (int i = 0; i < tokens.size(); i++) {
- DEPNode parserNode = tree.get(i + 1);
- if (parserNode.getLabel() == null) {
- int headIndex = tokens.indexOf(null);
- DEPNode head = tree.get(headIndex + 1);
- parserNode.setHead(head, "root");
- }
- }
-
- // Do the SRL
- predicateFinder.getResource().process(tree);
- roleSetClassifier.getResource().process(tree);
- roleLabeller.getResource().process(tree);
-
- // Convert the results into UIMA annotations
- Map predicates = new HashMap<>();
- Map> predArgs = new HashMap<>();
-
- for (int i = 0; i < tokens.size(); i++) {
- DEPNode parserNode = tree.get(i + 1);
- Token argumentToken = tokens.get(i);
-
- for (DEPArc argPredArc : parserNode.getSHeads()) {
- Token predToken = tokens.get(argPredArc.getNode().id - 1);
-
- // Instantiate the semantic predicate annotation if it hasn't been done yet
- SemPred pred = predicates.get(predToken);
- if (pred == null) {
- // Create the semantic predicate annotation itself
- pred = new SemPred(aJCas, predToken.getBegin(), predToken.getEnd());
- pred.setCategory(argPredArc.getNode().getFeat(DEPLib.FEAT_PB));
- pred.addToIndexes();
- predicates.put(predToken, pred);
-
- // Prepare a list to store its arguments
- predArgs.put(pred, new ArrayList<>());
- }
-
- // Instantiate the semantic argument annotation
- SemArg arg = new SemArg(aJCas);
-
- if (expandArguments) {
- List descendents = parserNode.getDescendents(Integer.MAX_VALUE)
- .stream()
- .map(arc -> arc.getNode())
- .collect(Collectors.toList());
- descendents.add(parserNode);
- List descTokens = descendents.stream()
- .map(node -> tokens.get(node.id - 1))
- .collect(Collectors.toList());
- int begin = descTokens.stream().mapToInt(t -> t.getBegin()).min()
- .getAsInt();
- int end = descTokens.stream().mapToInt(t -> t.getEnd()).max().getAsInt();
- arg.setBegin(begin);
- arg.setEnd(end);
- }
- else {
- arg.setBegin(argumentToken.getBegin());
- arg.setEnd(argumentToken.getEnd());
- }
-
- arg.addToIndexes();
-
- SemArgLink link = new SemArgLink(aJCas);
- link.setRole(argPredArc.getLabel());
- link.setTarget(arg);
-
- // Remember to which predicate this argument belongs
- predArgs.get(pred).add(link);
- }
- }
-
- for (Entry> e : predArgs.entrySet()) {
- e.getKey().setArguments(FSCollectionFactory.createFSArray(aJCas, e.getValue()));
- }
- }
- }
-
- private void printTags(String aType, AbstractComponent aComponent)
- {
- if (printTagSet && (aComponent instanceof AbstractStatisticalComponent)) {
- AbstractStatisticalComponent component = (AbstractStatisticalComponent) aComponent;
-
- Set tagSet = new HashSet();
-
- for (StringModel model : component.getModels()) {
- tagSet.addAll(asList(model.getLabels()));
- }
-
- List tagList = new ArrayList(tagSet);
- Collections.sort(tagList);
-
- StringBuilder sb = new StringBuilder();
- sb.append("Model of " + aType + " contains [").append(tagList.size())
- .append("] tags: ");
-
- for (String tag : tagList) {
- sb.append(tag);
- sb.append(" ");
- }
- getContext().getLogger().log(INFO, sb.toString());
- }
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/main/resources/org/dkpro/core/clearnlp/lib/parser-default-variants.map b/dkpro-core-clearnlp-asl/src/main/resources/org/dkpro/core/clearnlp/lib/parser-default-variants.map
deleted file mode 100644
index 8c7589c3c4..0000000000
--- a/dkpro-core-clearnlp-asl/src/main/resources/org/dkpro/core/clearnlp/lib/parser-default-variants.map
+++ /dev/null
@@ -1 +0,0 @@
-en=ontonotes
diff --git a/dkpro-core-clearnlp-asl/src/scripts/build.xml b/dkpro-core-clearnlp-asl/src/scripts/build.xml
index 941045a528..0679428309 100644
--- a/dkpro-core-clearnlp-asl/src/scripts/build.xml
+++ b/dkpro-core-clearnlp-asl/src/scripts/build.xml
@@ -1,581 +1,581 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpLemmatizerTest.java b/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpLemmatizerTest.java
deleted file mode 100644
index 0a003c4abe..0000000000
--- a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpLemmatizerTest.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.clearnlp;
-
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
-import static org.apache.uima.fit.util.JCasUtil.select;
-
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.jcas.JCas;
-import org.dkpro.core.testing.AssertAnnotations;
-import org.dkpro.core.testing.TestRunner;
-import org.junit.jupiter.api.Test;
-
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
-
-public class ClearNlpLemmatizerTest
-{
- @Test
- public void testEnglish()
- throws Exception
- {
- // Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1200000000l);
-
- JCas jcas = runTest("en", "We need a very complicated example sentence , which "
- + "contains as many constituents and dependencies as possible .");
-
- String[] lemmas = { "we", "need", "a", "very", "complicated", "example", "sentence", ",",
- "which", "contain", "as", "many", "constituent", "and", "dependency", "as",
- "possible", "." };
-
- AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
- }
-
- @Test
- public void testUnderscore()
- throws Exception
- {
- JCas jcas = runTest("en", "foo _ bar");
-
- String[] lemmas = { "foo", "_", "bar" };
-
- AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
- }
-
- private JCas runTest(String aLanguage, String aText)
- throws Exception
- {
- AnalysisEngineDescription tagger = createEngineDescription(ClearNlpPosTagger.class);
- AnalysisEngineDescription lemma = createEngineDescription(ClearNlpLemmatizer.class);
-
- JCas jcas = TestRunner.runTest(createEngineDescription(tagger, lemma), aLanguage, aText);
-
- return jcas;
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpParserTest.java b/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpParserTest.java
deleted file mode 100644
index 5bf2695591..0000000000
--- a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpParserTest.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.clearnlp;
-
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.jcas.JCas;
-import org.dkpro.core.opennlp.OpenNlpPosTagger;
-import org.dkpro.core.testing.AssertAnnotations;
-import org.dkpro.core.testing.TestRunner;
-import org.dkpro.core.testing.dumper.DependencyDumper;
-import org.junit.jupiter.api.Test;
-
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
-
-public class ClearNlpParserTest
-{
- static final String documentEnglish = "We need a very complicated example sentence , which " +
- "contains as many constituents and dependencies as possible .";
-
- @Test
- public void testEnglishDependencies()
- throws Exception
- {
- assumeTrue(Runtime.getRuntime().maxMemory() > 3000000000l);
-
- JCas jcas = runTest("en", null, documentEnglish);
-
- String[] dependencies = new String[] {
- "[ 0, 2]Dependency(nsubj,basic) D[0,2](We) G[3,7](need)",
- "[ 3, 7]ROOT(ROOT,basic) D[3,7](need) G[3,7](need)",
- "[ 8, 9]Dependency(det,basic) D[8,9](a) G[35,43](sentence)",
- "[ 10, 14]Dependency(advmod,basic) D[10,14](very) G[15,26](complicated)",
- "[ 15, 26]Dependency(amod,basic) D[15,26](complicated) G[35,43](sentence)",
- "[ 27, 34]Dependency(nn,basic) D[27,34](example) G[35,43](sentence)",
- "[ 35, 43]Dependency(dobj,basic) D[35,43](sentence) G[3,7](need)",
- "[ 44, 45]Dependency(punct,basic) D[44,45](,) G[35,43](sentence)",
- "[ 46, 51]Dependency(nsubj,basic) D[46,51](which) G[52,60](contains)",
- "[ 52, 60]Dependency(rcmod,basic) D[52,60](contains) G[35,43](sentence)",
- "[ 61, 63]Dependency(prep,basic) D[61,63](as) G[52,60](contains)",
- "[ 64, 68]Dependency(amod,basic) D[64,68](many) G[69,81](constituents)",
- "[ 69, 81]Dependency(pobj,basic) D[69,81](constituents) G[61,63](as)",
- "[ 82, 85]Dependency(cc,basic) D[82,85](and) G[69,81](constituents)",
- "[ 86, 98]Dependency(conj,basic) D[86,98](dependencies) G[69,81](constituents)",
- "[ 99,101]Dependency(prep,basic) D[99,101](as) G[86,98](dependencies)",
- "[102,110]Dependency(amod,basic) D[102,110](possible) G[99,101](as)",
- "[111,112]Dependency(punct,basic) D[111,112](.) G[3,7](need)" };
-
- AssertAnnotations.assertDependencies(dependencies, select(jcas, Dependency.class));
- }
-
- @Test
- public void testEnglishMayo()
- throws Exception
- {
-// Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1200000000l);
-
- JCas jcas = runTest("en", "mayo", documentEnglish);
-
- String[] dependencies = new String[] {
- "[ 0, 2]Dependency(nsubj,basic) D[0,2](We) G[3,7](need)",
- "[ 3, 7]ROOT(ROOT,basic) D[3,7](need) G[3,7](need)",
- "[ 8, 9]Dependency(det,basic) D[8,9](a) G[35,43](sentence)",
- "[ 10, 14]Dependency(advmod,basic) D[10,14](very) G[15,26](complicated)",
- "[ 15, 26]Dependency(amod,basic) D[15,26](complicated) G[35,43](sentence)",
- "[ 27, 34]Dependency(nn,basic) D[27,34](example) G[35,43](sentence)",
- "[ 35, 43]Dependency(dobj,basic) D[35,43](sentence) G[3,7](need)",
- "[ 44, 45]Dependency(punct,basic) D[44,45](,) G[35,43](sentence)",
- "[ 46, 51]Dependency(nsubj,basic) D[46,51](which) G[52,60](contains)",
- "[ 52, 60]Dependency(rcmod,basic) D[52,60](contains) G[35,43](sentence)",
- "[ 61, 63]Dependency(prep,basic) D[61,63](as) G[52,60](contains)",
- "[ 64, 68]Dependency(amod,basic) D[64,68](many) G[69,81](constituents)",
- "[ 69, 81]Dependency(pobj,basic) D[69,81](constituents) G[61,63](as)",
- "[ 82, 85]Dependency(cc,basic) D[82,85](and) G[69,81](constituents)",
- "[ 86, 98]Dependency(conj,basic) D[86,98](dependencies) G[69,81](constituents)",
- "[ 99,101]Dependency(mark,basic) D[99,101](as) G[102,110](possible)",
- "[102,110]Dependency(advcl,basic) D[102,110](possible) G[52,60](contains)",
- "[111,112]Dependency(punct,basic) D[111,112](.) G[3,7](need)" };
-
- AssertAnnotations.assertDependencies(dependencies, select(jcas, Dependency.class));
- }
-
- private JCas runTest(String aLanguage, String aVariant, String aText)
- throws Exception
- {
- AnalysisEngineDescription engine = createEngineDescription(
- createEngineDescription(OpenNlpPosTagger.class),
- createEngineDescription(ClearNlpLemmatizer.class),
- createEngineDescription(ClearNlpParser.class,
- ClearNlpParser.PARAM_VARIANT, aVariant,
- ClearNlpParser.PARAM_PRINT_TAGSET, true),
- createEngineDescription(DependencyDumper.class));
-
- return TestRunner.runTest(engine, aLanguage, aText);
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpPosTaggerTest.java b/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpPosTaggerTest.java
deleted file mode 100644
index 5c5168797b..0000000000
--- a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpPosTaggerTest.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.clearnlp;
-
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.jcas.JCas;
-import org.dkpro.core.testing.AssertAnnotations;
-import org.dkpro.core.testing.TestRunner;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
-
-public class ClearNlpPosTaggerTest
-{
- @Test
- public void testEnglish()
- throws Exception
- {
- assumeTrue(Runtime.getRuntime().maxMemory() > 1200000000l);
-
- runTest("en", null, "This is a test . \n",
- new String[] { "DT", "VBZ", "DT", "NN", "." },
- new String[] { "POS_DET", "POS_VERB", "POS_DET", "POS_NOUN", "POS_PUNCT" });
-
- runTest("en", null, "A neural net . \n",
- new String[] { "DT", "JJ", "NN", "." },
- new String[] { "POS_DET", "POS_ADJ", "POS_NOUN", "POS_PUNCT" });
-
- runTest("en", null, "John is purchasing oranges . \n",
- new String[] { "NNP", "VBZ", "VBG", "NNS", "." },
- new String[] { "POS_PROPN", "POS_VERB", "POS_VERB", "POS_NOUN", "POS_PUNCT" });
- }
-
- @Test
- public void testEnglishMayo()
- throws Exception
- {
- runTest("en", "mayo", "This is a test . \n",
- new String[] { "DT", "VBZ", "DT", "NN", "." },
- new String[] { "POS_DET", "POS_VERB", "POS_DET", "POS_NOUN", "POS_PUNCT" });
-
- runTest("en", "mayo", "A neural net . \n",
- new String[] { "DT", "JJ", "NN", "." },
- new String[] { "POS_DET", "POS_ADJ", "POS_NOUN", "POS_PUNCT" });
-
- runTest("en", "mayo", "John is purchasing oranges . \n",
- new String[] { "NNP", "VBZ", "VBG", "NNS", "." },
- new String[] { "POS_PROPN", "POS_VERB", "POS_VERB", "POS_NOUN", "POS_PUNCT" });
- }
-
- private void runTest(String language, String variant, String testDocument, String[] tags,
- String[] tagClasses)
- throws Exception
- {
- AnalysisEngine engine = createEngine(ClearNlpPosTagger.class,
- ClearNlpPosTagger.PARAM_VARIANT, variant,
- ClearNlpPosTagger.PARAM_PRINT_TAGSET, true);
-
- JCas jcas = TestRunner.runTest(engine, language, testDocument);
-
- AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class));
- }
-
- @BeforeEach
- public void clearMemory()
- {
- Runtime.getRuntime().gc();
- Runtime.getRuntime().gc();
- Runtime.getRuntime().gc();
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpSegmenterTest.java b/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpSegmenterTest.java
deleted file mode 100644
index f15d3e1d64..0000000000
--- a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpSegmenterTest.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.clearnlp;
-
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.fit.factory.JCasFactory;
-import org.apache.uima.jcas.JCas;
-import org.dkpro.core.testing.harness.SegmenterHarness;
-import org.junit.jupiter.api.Test;
-
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-
-public class ClearNlpSegmenterTest
-{
- @Test
- public void run() throws Throwable
- {
- AnalysisEngineDescription aed = createEngineDescription(ClearNlpSegmenter.class);
- SegmenterHarness.run(aed, "de.1", "de.2", "de.3", "de.4", "en.1", "en.7", "en.9", "ar.1",
- "zh.1", "zh.2");
- }
-
- /**
- * We had a bug where the token offsets were assigned wrong when one word was a suffix of the
- * previous word.
- */
- @Test
- public void testSuffix() throws Exception
- {
- JCas jcas = JCasFactory.createJCas();
- jcas.setDocumentLanguage("en");
- jcas.setDocumentText("this is is this is is");
-
- AnalysisEngine aed = createEngine(ClearNlpSegmenter.class);
- aed.process(jcas);
-
-
- List tokens = new ArrayList<>(select(jcas, Token.class));
- assertEquals(5, tokens.get(1).getBegin());
- assertEquals(7, tokens.get(1).getEnd());
-
- for (Token t : tokens) {
- System.out.printf("%d %d %s%n", t.getBegin(), t.getEnd(), t.getCoveredText());
- }
-
- }
-
- @Test
- public void testZoning() throws Exception
- {
- SegmenterHarness.testZoning(ClearNlpSegmenter.class);
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpSemanticRoleLabelerTest.java b/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpSemanticRoleLabelerTest.java
deleted file mode 100644
index 0dc8513d3f..0000000000
--- a/dkpro-core-clearnlp-asl/src/test/java/org/dkpro/core/clearnlp/ClearNlpSemanticRoleLabelerTest.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.clearnlp;
-
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-import org.apache.commons.lang3.ArrayUtils;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.jcas.JCas;
-import org.dkpro.core.opennlp.OpenNlpPosTagger;
-import org.dkpro.core.testing.AssertAnnotations;
-import org.dkpro.core.testing.TestRunner;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred;
-
-public class ClearNlpSemanticRoleLabelerTest
-{
- static final String documentEnglish = "We need a very complicated example sentence , which "
- + "contains as many constituents and dependencies as possible .";
-
- @Test
- public void testEnglish()
- throws Exception
- {
- assumeTrue(Runtime.getRuntime().maxMemory() > 3000000000l);
-
- JCas jcas = runTest("en", null, documentEnglish);
-
- String[] predicates = {
- "contains (contain.01): [(A0:sentence)(A1:as)(R-A0:which)]",
- "need (need.01): [(A0:We)(A1:sentence)]" };
-
- AssertAnnotations.assertSemPred(predicates, select(jcas, SemPred.class));
- }
-
- @Test
- public void testEnglishExpand()
- throws Exception
- {
- assumeTrue(Runtime.getRuntime().maxMemory() > 3000000000l);
-
- JCas jcas = runTest("en", null, documentEnglish,
- ClearNlpSemanticRoleLabeler.PARAM_EXPAND_ARGUMENTS, true);
-
- String[] predicates = {
- "contains (contain.01): ["
- + "(A0:a very complicated example sentence , which contains as many constituents and dependencies as possible)"
- + "(A1:as many constituents and dependencies as possible)"
- + "(R-A0:which)]",
- "need (need.01): ["
- + "(A0:We)"
- + "(A1:a very complicated example sentence , which contains as many constituents and dependencies as possible)]"
- };
-
- AssertAnnotations.assertSemPred(predicates, select(jcas, SemPred.class));
- }
-
- @Test
- public void testEnglishExpand2()
- throws Exception
- {
- assumeTrue(Runtime.getRuntime().maxMemory() > 3000000000l);
-
- JCas jcas = runTest("en", null, "The man was sued by Jacqueline Kennedy Onassis .",
- ClearNlpSemanticRoleLabeler.PARAM_EXPAND_ARGUMENTS, true);
-
- String[] predicates = { "sued (sue.01): [(A0:by Jacqueline Kennedy Onassis)(A1:The man)]" };
-
- AssertAnnotations.assertSemPred(predicates, select(jcas, SemPred.class));
- }
- @Test
- public void testEnglishMayo()
- throws Exception
- {
- assumeTrue(Runtime.getRuntime().maxMemory() > 3000000000l);
-
- JCas jcas = runTest("en", "mayo", documentEnglish);
-
- String[] predicates = {
- "contains (contain.01): [(A0:sentence)(A1:as)(R-A0:which)]",
- "need (need.01): [(A0:We)(A1:sentence)]" };
-
- AssertAnnotations.assertSemPred(predicates, select(jcas, SemPred.class));
- }
-
- private JCas runTest(String aLanguage, String aVariant, String aText, Object... aExtraParams)
- throws Exception
- {
- Object[] params = new Object[] {
- ClearNlpParser.PARAM_VARIANT, aVariant,
- ClearNlpParser.PARAM_PRINT_TAGSET, true};
- params = ArrayUtils.addAll(params, aExtraParams);
-
- AnalysisEngineDescription engine = createEngineDescription(
- createEngineDescription(OpenNlpPosTagger.class),
- createEngineDescription(ClearNlpLemmatizer.class),
- createEngineDescription(ClearNlpParser.class),
- createEngineDescription(ClearNlpSemanticRoleLabeler.class, params));
-
- return TestRunner.runTest(engine, aLanguage, aText);
- }
-
-
- @BeforeEach
- public void freeMemory()
- {
- Runtime.getRuntime().gc();
- Runtime.getRuntime().gc();
- Runtime.getRuntime().gc();
- }
-}
diff --git a/dkpro-core-clearnlp-asl/src/test/resources/log4j2.xml b/dkpro-core-clearnlp-asl/src/test/resources/log4j2.xml
deleted file mode 100644
index 19bf03b585..0000000000
--- a/dkpro-core-clearnlp-asl/src/test/resources/log4j2.xml
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/dkpro-core-io-web1t-asl/pom.xml b/dkpro-core-io-web1t-asl/pom.xml
index a756a0776f..1961ef878d 100644
--- a/dkpro-core-io-web1t-asl/pom.xml
+++ b/dkpro-core-io-web1t-asl/pom.xml
@@ -122,36 +122,18 @@