diff --git a/dkpro-core-asl/pom.xml b/dkpro-core-asl/pom.xml index 7d62f04642..b23f948aa7 100644 --- a/dkpro-core-asl/pom.xml +++ b/dkpro-core-asl/pom.xml @@ -157,7 +157,6 @@ experimental - ../dkpro-core-cogroo-asl ../dkpro-core-kuromoji-asl ../dkpro-core-io-annis-asl ../dkpro-core-io-gate-asl diff --git a/dkpro-core-bom-asl/pom.xml b/dkpro-core-bom-asl/pom.xml index db182bd760..80a2671872 100644 --- a/dkpro-core-bom-asl/pom.xml +++ b/dkpro-core-bom-asl/pom.xml @@ -568,11 +568,6 @@ dkpro-core-io-annis-asl 3.0.0-SNAPSHOT - - org.dkpro.core - dkpro-core-cogroo-asl - 3.0.0-SNAPSHOT - org.dkpro.core dkpro-core-kuromoji-asl diff --git a/dkpro-core-cogroo-asl/LICENSE.txt b/dkpro-core-cogroo-asl/LICENSE.txt deleted file mode 100644 index d645695673..0000000000 --- a/dkpro-core-cogroo-asl/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/dkpro-core-cogroo-asl/pom.xml b/dkpro-core-cogroo-asl/pom.xml deleted file mode 100644 index 8cd3116be5..0000000000 --- a/dkpro-core-cogroo-asl/pom.xml +++ /dev/null @@ -1,159 +0,0 @@ - - - 4.0.0 - - org.dkpro.core - dkpro-core-asl - 3.0.0-SNAPSHOT - ../dkpro-core-asl - - - dkpro-core-cogroo-asl - jar - DKPro Core ASL - CoGrOO (EXPERIMENTAL) - https://dkpro.github.io/dkpro-core/ - - - 4.0.0 - - - - - org.apache.uima - uimaj-core - - - org.apache.uima - uimafit-core - - - org.cogroo - cogroo-gc - ${cogroo.version} - - - org.cogroo - cogroo-ann - ${cogroo.version} - - - org.cogroo.lang.pt_br - cogroo-ann-pt_br - ${cogroo.version} - - - org.cogroo.lang.pt_br - cogroo-gc-pt_br - ${cogroo.version} - - - org.dkpro.core - dkpro-core-api-lexmorph-asl - ${project.version} - - - org.dkpro.core - dkpro-core-api-segmentation-asl - ${project.version} - - - org.dkpro.core - dkpro-core-api-ner-asl - ${project.version} - - - org.dkpro.core - dkpro-core-api-anomaly-asl - ${project.version} - - - org.dkpro.core - dkpro-core-api-resources-asl - ${project.version} - - - org.dkpro.core - dkpro-core-api-parameter-asl - ${project.version} - - - eu.openminted.share.annotations - omtd-share-annotations-api - - - org.dkpro.core - dkpro-core-testing-asl - ${project.version} - test - - - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - - org.cogroo.lang.pt_br:cogroo-ann-pt_br - org.cogroo.lang.pt_br:cogroo-gc-pt_br - - - - - - - - \ No newline at end of file diff --git a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooChecker.java b/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooChecker.java deleted file mode 100644 index 998dac593c..0000000000 --- a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooChecker.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import java.io.IOException; -import java.net.URL; -import java.util.Locale; -import java.util.Properties; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.fit.component.JCasAnnotator_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.LanguageCapability; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.checker.CheckDocument; -import org.cogroo.checker.GrammarChecker; -import org.cogroo.entities.Mistake; -import org.dkpro.core.api.parameter.ComponentParameters; -import org.dkpro.core.api.resources.ModelProviderBase; - -import de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.GrammarAnomaly; -import eu.openminted.share.annotations.api.Component; -import eu.openminted.share.annotations.api.DocumentationResource; -import eu.openminted.share.annotations.api.constants.OperationType; - -/** - * Detect grammatical errors in text using CoGrOO. - */ -@Component(OperationType.GRAMMAR_CHECKER) -@ResourceMetaData(name = "CoGrOO Grammar Checker") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@LanguageCapability("pt") -@TypeCapability( - outputs = { - "de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.GrammarAnomaly" }) -public class CogrooChecker - extends JCasAnnotator_ImplBase -{ - public static enum DetailLevel { - SHORT, LONG, FULL - } - - /** - * Use this language instead of the document language to resolve the model. - */ - public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; - @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) - private String language; - - /** - * Set detail level. - */ - public static final String PARAM_DETAIL_LEVEL = "detailLevel"; - @ConfigurationParameter(name = PARAM_DETAIL_LEVEL, mandatory = true, defaultValue = "SHORT") - private DetailLevel detailLevel; - - private ModelProviderBase modelProvider; - - @Override - public void initialize(UimaContext aContext) - throws ResourceInitializationException - { - super.initialize(aContext); - - modelProvider = new ModelProviderBase() { - { - setContextObject(CogrooChecker.this); - setDefault(LOCATION, NOT_REQUIRED); - - setOverride(LANGUAGE, language); - } - - @Override - protected GrammarChecker produceResource(URL aUrl) - throws IOException - { - Properties props = getAggregatedProperties(); - if (!"pt".equals(props.getProperty(LANGUAGE))) { - throw new IOException("The language code '" - + props.getProperty(LANGUAGE) + "' is not supported by LanguageTool."); - } - - ComponentFactory factory = ComponentFactory.create(new Locale("pt", "BR")); - return new GrammarChecker(factory.createPipe()); - } - }; - } - - @Override - public void process(JCas aJCas) - throws AnalysisEngineProcessException - { - modelProvider.configure(aJCas.getCas()); - - // get document text - CheckDocument document = new CheckDocument(aJCas.getDocumentText()); - - modelProvider.getResource().analyze(document); - for (Mistake match : document.getMistakes()) { - // create annotation - GrammarAnomaly annotation = new GrammarAnomaly(aJCas); - annotation.setBegin(match.getStart()); - annotation.setEnd(match.getEnd()); - switch (detailLevel) { - case SHORT: - annotation.setDescription(match.getShortMessage()); - break; - case LONG: - annotation.setDescription(match.getLongMessage()); - break; - case FULL: - annotation.setDescription(match.getFullMessage()); - break; - } - annotation.addToIndexes(); - if (getLogger().isTraceEnabled()) { - getLogger().trace("Found: " + annotation); - } - } - } -} diff --git a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooFeaturizer.java b/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooFeaturizer.java deleted file mode 100644 index 21183eb5de..0000000000 --- a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooFeaturizer.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static java.util.Arrays.asList; -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.apache.uima.fit.util.JCasUtil.selectCovered; - -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.Properties; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.CAS; -import org.apache.uima.fit.component.JCasAnnotator_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.LanguageCapability; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.cogroo.analyzer.Analyzer; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.text.Document; -import org.cogroo.text.impl.DocumentImpl; -import org.cogroo.text.impl.SentenceImpl; -import org.cogroo.text.impl.TokenImpl; -import org.dkpro.core.api.parameter.ComponentParameters; -import org.dkpro.core.api.resources.CasConfigurableProviderBase; -import org.dkpro.core.api.resources.ModelProviderBase; - -import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.openminted.share.annotations.api.Component; -import eu.openminted.share.annotations.api.DocumentationResource; -import eu.openminted.share.annotations.api.constants.OperationType; - -/** - * Morphological analyzer using CoGrOO. - */ -@Component(OperationType.MORPHOLOGICAL_TAGGER) -@ResourceMetaData(name = "CoGrOO Morphological Analyzer") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@LanguageCapability("pt") -@TypeCapability( - inputs = { - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", - "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }, - outputs = { - "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures" }) - -public class CogrooFeaturizer - extends JCasAnnotator_ImplBase -{ - /** - * Use this language instead of the document language to resolve the model. - */ - public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; - @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) - protected String language; - - private CasConfigurableProviderBase modelProvider; - - @Override - public void initialize(UimaContext aContext) - throws ResourceInitializationException - { - super.initialize(aContext); - - modelProvider = new ModelProviderBase() { - { - setContextObject(CogrooFeaturizer.this); - - setDefault(LOCATION, NOT_REQUIRED); - setOverride(LANGUAGE, language); - } - - @Override - protected Analyzer produceResource(URL aUrl) - throws IOException - { - Properties props = getAggregatedProperties(); - - String language = props.getProperty(LANGUAGE); - - if (!"pt".equals(language)) { - throw new IOException("The language code '" + language - + "' is not supported by LanguageTool."); - } - - ComponentFactory factory = ComponentFactory.create(new Locale("pt", "BR")); - - return factory.createFeaturizer(); - } - }; - } - - @Override - public void process(JCas aJCas) - throws AnalysisEngineProcessException - { - CAS cas = aJCas.getCas(); - modelProvider.configure(cas); - - // This is actually quite some overhead, because internally Cogroo is just using a - // OpenNLP classifier which simply takes a token and pos tag and returnes a list of - // features. It would be much more efficient to use the classifier directly. - - for (Sentence sentence : select(aJCas, Sentence.class)) { - // We set up one CoGrOO document for each sentence. That makes it easier to maintain - // a list of tokens of the sentence, which we later need to attached the lemmata to the - // tokens. - - // Construct the document - Document doc = new DocumentImpl(); - doc.setText(aJCas.getDocumentText()); - - // Extract the sentence and its tokens - org.cogroo.text.Sentence cSent = new SentenceImpl(sentence.getBegin(), - sentence.getEnd(), doc); - List cTokens = new ArrayList(); - List dTokens = selectCovered(Token.class, sentence); - for (Token dTok : dTokens) { - TokenImpl cTok = new TokenImpl(dTok.getBegin() - sentence.getBegin(), - dTok.getEnd() - sentence.getBegin(), dTok.getText()); - cTok.setPOSTag(dTok.getPos().getPosValue()); - cTokens.add(cTok); - } - cSent.setTokens(cTokens); - doc.setSentences(asList(cSent)); - - // Process - modelProvider.getResource().analyze(doc); - - assert cSent.getTokens().size() == dTokens.size(); - - // Convert from CoGrOO to UIMA model - Iterator dTokIt = dTokens.iterator(); - for (org.cogroo.text.Token cTok : cSent.getTokens()) { - Token dTok = dTokIt.next(); - MorphologicalFeatures m = new MorphologicalFeatures(aJCas, cSent.getStart() - + cTok.getStart(), cSent.getStart() + cTok.getEnd()); - m.setValue(cTok.getFeatures()); - m.addToIndexes(); - dTok.setMorph(m); - } - } - } -} diff --git a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooLemmatizer.java b/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooLemmatizer.java deleted file mode 100644 index f63e1fa81f..0000000000 --- a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooLemmatizer.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static java.util.Arrays.asList; -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.apache.uima.fit.util.JCasUtil.selectCovered; - -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.Properties; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.CAS; -import org.apache.uima.fit.component.JCasAnnotator_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.LanguageCapability; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.cogroo.analyzer.Analyzer; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.text.Document; -import org.cogroo.text.impl.DocumentImpl; -import org.cogroo.text.impl.SentenceImpl; -import org.cogroo.text.impl.TokenImpl; -import org.dkpro.core.api.parameter.ComponentParameters; -import org.dkpro.core.api.resources.CasConfigurableProviderBase; -import org.dkpro.core.api.resources.ModelProviderBase; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.openminted.share.annotations.api.Component; -import eu.openminted.share.annotations.api.DocumentationResource; -import eu.openminted.share.annotations.api.constants.OperationType; - -/** - * Lemmatizer using CoGrOO. - */ -@Component(OperationType.LEMMATIZER) -@ResourceMetaData(name = "CoGrOO Lemmatizer") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@LanguageCapability("pt") -@TypeCapability( - inputs = { - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", - "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }, - outputs = { - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma" }) - -public class CogrooLemmatizer - extends JCasAnnotator_ImplBase -{ - /** - * Use this language instead of the document language to resolve the model. - */ - public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; - @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) - protected String language; - - private CasConfigurableProviderBase modelProvider; - - @Override - public void initialize(UimaContext aContext) - throws ResourceInitializationException - { - super.initialize(aContext); - - modelProvider = new ModelProviderBase() { - { - setContextObject(CogrooLemmatizer.this); - - setDefault(LOCATION, NOT_REQUIRED); - setOverride(LANGUAGE, language); - } - - @Override - protected Analyzer produceResource(URL aUrl) - throws IOException - { - Properties props = getAggregatedProperties(); - - String language = props.getProperty(LANGUAGE); - - if (!"pt".equals(language)) { - throw new IOException("The language code '" + language - + "' is not supported by LanguageTool."); - } - - ComponentFactory factory = ComponentFactory.create(new Locale("pt", "BR")); - - return factory.createLemmatizer(); - } - }; - } - - @Override - public void process(JCas aJCas) - throws AnalysisEngineProcessException - { - CAS cas = aJCas.getCas(); - modelProvider.configure(cas); - - // This is actually quite some overhead, because internally Cogroo is just using a - // Morphlogik dictionary which simply takes a token and pos tag and returnes a list of - // lemmata. It would be much more efficient to use the dictionary directly. - - for (Sentence sentence : select(aJCas, Sentence.class)) { - // We set up one CoGrOO document for each sentence. That makes it easier to maintain - // a list of tokens of the sentence, which we later need to attached the lemmata to the - // tokens. - - // Construct the document - Document doc = new DocumentImpl(); - doc.setText(aJCas.getDocumentText()); - - // Extract the sentence and its tokens - org.cogroo.text.Sentence cSent = new SentenceImpl(sentence.getBegin(), - sentence.getEnd(), doc); - List cTokens = new ArrayList(); - List dTokens = selectCovered(Token.class, sentence); - for (Token dTok : dTokens) { - TokenImpl cTok = new TokenImpl(dTok.getBegin() - sentence.getBegin(), - dTok.getEnd() - sentence.getBegin(), dTok.getText()); - cTok.setPOSTag(dTok.getPos().getPosValue()); - cTokens.add(cTok); - } - cSent.setTokens(cTokens); - doc.setSentences(asList(cSent)); - - // Process - modelProvider.getResource().analyze(doc); - - assert cSent.getTokens().size() == dTokens.size(); - - // Convert from CoGrOO to UIMA model - Iterator dTokIt = dTokens.iterator(); - for (org.cogroo.text.Token cTok : cSent.getTokens()) { - // CoGrOO allows storing multiple lemmas per token. DKPro Core only allows one lemma - // per token. We just take the first one here. If we would run the grammar - // checking based on the DKPro Core lemmata, we might miss certain errors for this - // reason. - Token dTok = dTokIt.next(); - String[] lemmas = cTok.getLemmas(); - Lemma l = new Lemma(aJCas, cSent.getStart() + cTok.getStart(), - cSent.getStart() + cTok.getEnd()); - if (lemmas != null && lemmas.length > 0) { - String lemmaString = lemmas[0]; - if (lemmaString == null) { - lemmaString = dTok.getText(); - } - l.setValue(lemmaString); - } - else { - l.setValue(cTok.getLexeme()); - } - l.addToIndexes(); - dTok.setLemma(l); - } - } - } -} diff --git a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooNamedEntityRecognizer.java b/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooNamedEntityRecognizer.java deleted file mode 100644 index 989aebccce..0000000000 --- a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooNamedEntityRecognizer.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.apache.uima.fit.util.JCasUtil.selectCovered; - -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Locale; -import java.util.Properties; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.CAS; -import org.apache.uima.fit.component.JCasAnnotator_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.LanguageCapability; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.cogroo.analyzer.Analyzer; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.config.Analyzers; -import org.cogroo.text.Document; -import org.cogroo.text.impl.DocumentImpl; -import org.cogroo.text.impl.SentenceImpl; -import org.cogroo.text.impl.TokenImpl; -import org.dkpro.core.api.parameter.ComponentParameters; -import org.dkpro.core.api.resources.CasConfigurableProviderBase; -import org.dkpro.core.api.resources.ModelProviderBase; - -import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.openminted.share.annotations.api.Component; -import eu.openminted.share.annotations.api.DocumentationResource; -import eu.openminted.share.annotations.api.constants.OperationType; - -/** - * Tokenizer and sentence splitter using CoGrOO. - */ -@Component(OperationType.NAMED_ENTITITY_RECOGNIZER) -@ResourceMetaData(name = "CoGrOO Named Entity Recognizer") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@LanguageCapability("pt") -@TypeCapability( - inputs = { - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" }, - outputs = { - "de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity" }) -public class CogrooNamedEntityRecognizer - extends JCasAnnotator_ImplBase -{ - /** - * Use this language instead of the document language to resolve the model. - */ - public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; - @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) - protected String language; - - private CasConfigurableProviderBase modelProvider; - - @Override - public void initialize(UimaContext aContext) - throws ResourceInitializationException - { - super.initialize(aContext); - - modelProvider = new ModelProviderBase() { - { - setContextObject(CogrooNamedEntityRecognizer.this); - - setDefault(LOCATION, NOT_REQUIRED); - setOverride(LANGUAGE, language); - } - - @Override - protected Analyzer produceResource(URL aUrl) - throws IOException - { - Properties props = getAggregatedProperties(); - - String language = props.getProperty(LANGUAGE); - - return ComponentFactory.create(Locale.forLanguageTag(language)).createNameFinder(); - } - }; - } - - @Override - public void process(JCas aJCas) - throws AnalysisEngineProcessException - { - CAS cas = aJCas.getCas(); - modelProvider.configure(cas); - - // This is actually quite some overhead, because internally Cogroo is just using the - // OpenNLP namefinder which simply takes a string array and returns and arrays of spans... - // It would be much more efficient to use the model directly. - - // Convert from UIMA to Cogroo model - Document doc = new DocumentImpl(); - doc.setText(aJCas.getDocumentText()); - List sentences = new ArrayList(); - for (Sentence sentence : select(aJCas, Sentence.class)) { - org.cogroo.text.Sentence s = new SentenceImpl(sentence.getBegin(), sentence.getEnd(), - doc); - List tokens = new ArrayList(); - for (Token token : selectCovered(Token.class, sentence)) { - tokens.add(new TokenImpl(token.getBegin() - sentence.getBegin(), - token.getEnd() - sentence.getBegin(), token.getCoveredText())); - } - s.setTokens(tokens); - sentences.add(s); - } - doc.setSentences(sentences); - - // Process - modelProvider.getResource().analyze(doc); - - // Convert from Cogroo to UIMA model - for (org.cogroo.text.Sentence s : doc.getSentences()) { - for (org.cogroo.text.Token t : s.getTokens()) { - if ("P".equals(t.getAdditionalContext(Analyzers.NAME_FINDER))) { - NamedEntity ne = new NamedEntity(aJCas, s.getStart() + t.getStart(), - s.getStart() + t.getEnd()); - ne.setValue("P"); - ne.addToIndexes(); - } - } - } - } -} diff --git a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooPosTagger.java b/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooPosTagger.java deleted file mode 100644 index f86bae38ce..0000000000 --- a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooPosTagger.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static java.util.Arrays.asList; -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.apache.uima.fit.util.JCasUtil.selectCovered; -import static org.dkpro.core.api.parameter.ComponentParameters.DEFAULT_MAPPING_ENABLED; -import static org.dkpro.core.api.resources.MappingProviderFactory.createPosMappingProvider; - -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.Properties; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.Type; -import org.apache.uima.fit.component.JCasAnnotator_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.LanguageCapability; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.cogroo.analyzer.Analyzer; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.text.Document; -import org.cogroo.text.impl.DocumentImpl; -import org.cogroo.text.impl.SentenceImpl; -import org.cogroo.text.impl.TokenImpl; -import org.dkpro.core.api.lexmorph.pos.POSUtils; -import org.dkpro.core.api.parameter.ComponentParameters; -import org.dkpro.core.api.resources.CasConfigurableProviderBase; -import org.dkpro.core.api.resources.MappingProvider; -import org.dkpro.core.api.resources.ModelProviderBase; - -import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.openminted.share.annotations.api.Component; -import eu.openminted.share.annotations.api.DocumentationResource; -import eu.openminted.share.annotations.api.constants.OperationType; - -/** - * POS-tagger using CoGrOO. - */ -@Component(OperationType.PART_OF_SPEECH_TAGGER) -@ResourceMetaData(name = "CoGrOO POS-Tagger") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@LanguageCapability("pt") -@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" }) -public class CogrooPosTagger - extends JCasAnnotator_ImplBase -{ - /** - * Use this language instead of the document language to resolve the model. - */ - public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; - @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) - protected String language; - - /** - * Enable/disable type mapping. - */ - public static final String PARAM_MAPPING_ENABLED = ComponentParameters.PARAM_MAPPING_ENABLED; - @ConfigurationParameter(name = PARAM_MAPPING_ENABLED, defaultValue = DEFAULT_MAPPING_ENABLED) - protected boolean mappingEnabled; - - /** - * Load the part-of-speech tag to UIMA type mapping from this location instead of locating the - * mapping automatically. - */ - public static final String PARAM_POS_MAPPING_LOCATION = ComponentParameters.PARAM_POS_MAPPING_LOCATION; - @ConfigurationParameter(name = PARAM_POS_MAPPING_LOCATION, mandatory = false) - protected String posMappingLocation; - - private CasConfigurableProviderBase modelProvider; - private MappingProvider mappingProvider; - - @Override - public void initialize(UimaContext aContext) throws ResourceInitializationException - { - super.initialize(aContext); - - modelProvider = new ModelProviderBase() - { - { - setContextObject(CogrooPosTagger.this); - - setDefault(LOCATION, NOT_REQUIRED); - setOverride(LANGUAGE, language); - } - - @Override - protected Analyzer produceResource(URL aUrl) throws IOException - { - Properties props = getAggregatedProperties(); - - String language = props.getProperty(LANGUAGE); - - if (!"pt".equals(language)) { - throw new IOException("The language code '" + language - + "' is not supported by LanguageTool."); - } - - ComponentFactory factory = ComponentFactory.create(new Locale("pt", "BR")); - return factory.createPOSTagger(); - } - }; - - mappingProvider = createPosMappingProvider(this, posMappingLocation, "bosque", language); - } - - @Override - public void process(JCas aJCas) throws AnalysisEngineProcessException - { - CAS cas = aJCas.getCas(); - modelProvider.configure(cas); - mappingProvider.configure(cas); - - // This is actually quite some overhead, because internally Cogroo is just using a - // Morphlogik dictionary which simply takes a token and pos tag and returnes a list of - // lemmata. It would be much more efficient to use the dictionary directly. - - for (Sentence sentence : select(aJCas, Sentence.class)) { - // We set up one CoGrOO document for each sentence. That makes it easier to maintain - // a list of tokens of the sentence, which we later need to attached the lemmata to the - // tokens. - - // Construct the document - Document doc = new DocumentImpl(); - doc.setText(aJCas.getDocumentText()); - - // Extract the sentence and its tokens - org.cogroo.text.Sentence cSent = new SentenceImpl(sentence.getBegin(), - sentence.getEnd(), doc); - List cTokens = new ArrayList(); - List dTokens = selectCovered(Token.class, sentence); - for (Token dTok : dTokens) { - TokenImpl cTok = new TokenImpl(dTok.getBegin() - sentence.getBegin(), - dTok.getEnd() - sentence.getBegin(), dTok.getText()); - cTokens.add(cTok); - } - cSent.setTokens(cTokens); - doc.setSentences(asList(cSent)); - - // Process - modelProvider.getResource().analyze(doc); - - assert cSent.getTokens().size() == dTokens.size(); - - // Convert from CoGrOO to UIMA model - Iterator dTokIt = dTokens.iterator(); - for (org.cogroo.text.Token cTok : cSent.getTokens()) { - // CoGrOO allows storing multiple lemmas per token. DKPro Core only allows one lemma - // per token. We just take the first one here. If we would run the grammar - // checking based on the DKPro Core lemmata, we might miss certain errors for this - // reason. - Token dTok = dTokIt.next(); - - Type posTag = mappingProvider.getTagType(cTok.getPOSTag()); - POS posAnno = (POS) cas.createAnnotation(posTag, cSent.getStart() + cTok.getStart(), - cSent.getStart() + cTok.getEnd()); - String tag = cTok.getPOSTag(); - posAnno.setPosValue(tag != null ? tag.intern() : null); - POSUtils.assignCoarseValue(posAnno); - posAnno.addToIndexes(); - dTok.setPos(posAnno); - } - } - } -} diff --git a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooSegmenter.java b/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooSegmenter.java deleted file mode 100644 index 8ce97e6503..0000000000 --- a/dkpro-core-cogroo-asl/src/main/java/org/dkpro/core/cogroo/CogrooSegmenter.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import java.io.IOException; -import java.net.URL; -import java.util.Locale; -import java.util.Properties; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.CAS; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.LanguageCapability; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.cogroo.analyzer.Analyzer; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.text.Document; -import org.cogroo.text.Sentence; -import org.cogroo.text.Token; -import org.cogroo.text.impl.DocumentImpl; -import org.dkpro.core.api.parameter.ComponentParameters; -import org.dkpro.core.api.resources.CasConfigurableProviderBase; -import org.dkpro.core.api.resources.ModelProviderBase; -import org.dkpro.core.api.segmentation.SegmenterBase; - -import eu.openminted.share.annotations.api.DocumentationResource; - -/** - * Tokenizer and sentence splitter using CoGrOO. - */ -@ResourceMetaData(name = "CoGrOO Segmenter") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@LanguageCapability("pt") -@TypeCapability( - outputs = { - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" }) -public class CogrooSegmenter - extends SegmenterBase -{ - /** - * Use this language instead of the document language to resolve the model. - */ - public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; - @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) - protected String language; - - private CasConfigurableProviderBase sentenceModelProvider; - private CasConfigurableProviderBase tokenModelProvider; - - @Override - public void initialize(UimaContext aContext) - throws ResourceInitializationException - { - super.initialize(aContext); - - sentenceModelProvider = new ModelProviderBase() { - { - setContextObject(CogrooSegmenter.this); - - setDefault(LOCATION, NOT_REQUIRED); - setOverride(LANGUAGE, language); - } - - @Override - protected Analyzer produceResource(URL aUrl) - throws IOException - { - Properties props = getAggregatedProperties(); - String language = props.getProperty(LANGUAGE); - - return ComponentFactory.create(Locale.forLanguageTag(language)) - .createSentenceDetector(); - } - }; - - tokenModelProvider = new ModelProviderBase() { - { - setContextObject(CogrooSegmenter.this); - - setDefault(LOCATION, NOT_REQUIRED); - setOverride(LANGUAGE, language); - } - - @Override - protected Analyzer produceResource(URL aUrl) - throws IOException - { - Properties props = getAggregatedProperties(); - String language = props.getProperty(LANGUAGE); - - return ComponentFactory.create(Locale.forLanguageTag(language)) - .createTokenizer(); - } - }; - } - - @Override - public void process(JCas aJCas) - throws AnalysisEngineProcessException - { - CAS cas = aJCas.getCas(); - sentenceModelProvider.configure(cas); - tokenModelProvider.configure(cas); - - super.process(aJCas); - } - - @Override - protected void process(JCas aJCas, String aText, int aZoneBegin) - throws AnalysisEngineProcessException - { - Document doc = new DocumentImpl(); - doc.setText(aText); - - sentenceModelProvider.getResource().analyze(doc); - tokenModelProvider.getResource().analyze(doc); - - for (Sentence s : doc.getSentences()) { - createSentence(aJCas, s.getStart() + aZoneBegin, s.getEnd() + aZoneBegin); - for (Token t : s.getTokens()) { - createToken(aJCas, t.getStart() + s.getStart() + aZoneBegin, - t.getEnd() + s.getStart() + aZoneBegin); - } - } - } -} diff --git a/dkpro-core-cogroo-asl/src/scripts/build.xml b/dkpro-core-cogroo-asl/src/scripts/build.xml deleted file mode 100644 index b5e1fdcc2b..0000000000 --- a/dkpro-core-cogroo-asl/src/scripts/build.xml +++ /dev/null @@ -1,146 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooCheckerTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooCheckerTest.java deleted file mode 100644 index 6d23d700fc..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooCheckerTest.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.dkpro.core.testing.AssertAnnotations.assertAnomaly; - -import org.apache.uima.UIMAException; -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.TestRunner; -import org.junit.jupiter.api.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.GrammarAnomaly; - -public class CogrooCheckerTest -{ - @Test - public void test() - throws Exception - { - JCas jcas = runTest("pt", - "Fomos levados à crer que os menino são burro de doer. As menina chegaram."); - - String[] anomalies = { - "[ 14, 15] GrammarAnomaly (Não acontece crase antes de verbo.)", - "[ 25, 34] GrammarAnomaly (Os artigos concordam com o substantivo a que se referem.)", - "[ 54, 63] GrammarAnomaly (Os artigos concordam com o substantivo a que se referem.)", - "[ 64, 72] GrammarAnomaly (Verificou-se erro de concordância entre o sujeito e o verbo.)" - }; - - assertAnomaly(anomalies, select(jcas, GrammarAnomaly.class)); - } - - private JCas runTest(String aLanguage, String aText) - throws UIMAException - { - AnalysisEngineDescription checker = createEngineDescription(CogrooChecker.class); - - return TestRunner.runTest(checker, aLanguage, aText); - } -} diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooFeaturizerTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooFeaturizerTest.java deleted file mode 100644 index 786d072dea..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooFeaturizerTest.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; -import static org.apache.uima.fit.util.JCasUtil.select; - -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.AssertAnnotations; -import org.dkpro.core.testing.TestRunner; -import org.junit.jupiter.api.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures; - -public class CogrooFeaturizerTest -{ - @Test - public void testPortuguese() throws Exception - { - runTest("pt", "Este é um teste . ", new String[] { - "[ 0, 4] - - - - - - - - - - - - - - - - - Este (M=S)", - "[ 5, 6] - - - - - - - - - - - - - - - - - é (PR=3S=IND)", - "[ 7, 9] - - - - - - - - - - - - - - - - - um (M=S)", - "[ 10, 15] - - - - - - - - - - - - - - - - - teste (M=S)", - "[ 16, 17] - - - - - - - - - - - - - - - - - . (-)" - }); - - runTest("pt", "Uma rede neural .", new String[] { - "[ 0, 3] - - - - - - - - - - - - - - - - - Uma (F=S)", - "[ 4, 8] - - - - - - - - - - - - - - - - - rede (F=S)", - "[ 9, 15] - - - - - - - - - - - - - - - - - neural (F=S)", - "[ 16, 17] - - - - - - - - - - - - - - - - - . (-)" - }); - - runTest("pt", "John está comprando laranjas .", new String[] { - "[ 0, 4] - - - - - - - - - - - - - - - - - John (M=S)", - "[ 5, 9] - - - - - - - - - - - - - - - - - está (PR=3S=IND)", - "[ 10, 19] - - - - - - - - - - - - - - - - - comprando (-)", - "[ 20, 28] - - - - - - - - - - - - - - - - - laranjas (F=P)", - "[ 29, 30] - - - - - - - - - - - - - - - - - . (-)" - }); - } - - private void runTest(String language, String testDocument, String[] aFeatures) throws Exception - { - AnalysisEngineDescription engine = createEngineDescription( - createEngineDescription(CogrooPosTagger.class), - createEngineDescription(CogrooFeaturizer.class)); - - JCas jcas = TestRunner.runTest(engine, language, testDocument); - - AssertAnnotations.assertMorph(aFeatures, select(jcas, MorphologicalFeatures.class)); - } -} diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooLemmatizerTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooLemmatizerTest.java deleted file mode 100644 index e2cbf3586c..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooLemmatizerTest.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; -import static org.apache.uima.fit.util.JCasUtil.select; - -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.AssertAnnotations; -import org.dkpro.core.testing.TestRunner; -import org.junit.jupiter.api.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; - -public class CogrooLemmatizerTest -{ - @Test - public void testPortuguese() throws Exception - { - runTest("pt", "Este é um teste . ", new String[] { "este", "ser", "um", "teste", "." }); - - runTest("pt", "Uma rede neural .", new String[] { "um", "rede", "neural", "." }); - - runTest("pt", "John está comprando laranjas .", - new String[] { "John", "estar", "comprar", "laranja", "." }); - } - - private void runTest(String language, String testDocument, String[] aLemma) throws Exception - { - AnalysisEngineDescription engine = createEngineDescription( - createEngineDescription(CogrooPosTagger.class), - createEngineDescription(CogrooLemmatizer.class)); - - JCas jcas = TestRunner.runTest(engine, language, testDocument); - - AssertAnnotations.assertLemma(aLemma, select(jcas, Lemma.class)); - } -} diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooNamedEntityRecognizerTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooNamedEntityRecognizerTest.java deleted file mode 100644 index 66c228d3bb..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooNamedEntityRecognizerTest.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.dkpro.core.testing.AssertAnnotations.assertNamedEntity; - -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.TestRunner; -import org.junit.jupiter.api.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; - -public class CogrooNamedEntityRecognizerTest -{ - /** - * The CogRoo name finder is a bit strange because it appears to find only multi-word named - * entities. It also doesn't classify them. - * @throws Exception if a problem occurs. - */ - @Test - public void testPortuguese() - throws Exception - { - JCas jcas = runTest("pt-BR", "Maria Gomez está viva. Fernando Pessoa morreu ."); - - String[] namedEntities = new String[] { - "[ 0, 11]NamedEntity(P) (Maria Gomez)", - "[ 23, 38]NamedEntity(P) (Fernando Pessoa)" }; - - assertNamedEntity(namedEntities, select(jcas, NamedEntity.class)); - } - - private JCas runTest(String aLanguage, String aDocument) - throws Exception - { - AnalysisEngineDescription desc = createEngineDescription( - CogrooNamedEntityRecognizer.class - //CogrooNameFinder.PARAM_PRINT_TAGSET, true - ); - - return TestRunner.runTest(desc, aLanguage, aDocument); - } -} diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooPosTaggerTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooPosTaggerTest.java deleted file mode 100644 index b989e2d7a4..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooPosTaggerTest.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; -import static org.apache.uima.fit.util.JCasUtil.select; - -import org.apache.uima.analysis_engine.AnalysisEngine; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.AssertAnnotations; -import org.dkpro.core.testing.TestRunner; -import org.junit.jupiter.api.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; - -public class CogrooPosTaggerTest -{ - @Test - public void testPortuguese() - throws Exception - { - JCas jcas = runTest("pt", null, "Este é um teste .", - new String[] { "pron-det", "v-fin", "art", "n", "." }, - new String[] { "POS_PRON", "POS_VERB", "POS_DET", "POS_NOUN", "POS" }); - -// String[] posTags = new String[] { "?", "adj", "adv", "art", "conj-c", "conj-s", "ec", "in", -// "n", "num", "pp", "pron-det", "pron-indp", "pron-pers", "prop", "prp", "punc", -// "v-fin", "v-ger", "v-inf", "v-pcp", "vp" }; -// -// AssertAnnotations.assertTagset(POS.class, "bosque", posTags, jcas); - } - - private JCas runTest(String language, String variant, String testDocument, String[] tags, - String[] tagClasses) - throws Exception - { - AnalysisEngine engine = createEngine(CogrooPosTagger.class); - - JCas jcas = TestRunner.runTest(engine, language, testDocument); - - AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class)); - - return jcas; - } -} diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooSegmenterTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooSegmenterTest.java deleted file mode 100644 index 1ed800d9c7..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/CogrooSegmenterTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; -import static org.apache.uima.fit.util.JCasUtil.select; -import static org.dkpro.core.testing.AssertAnnotations.assertSentence; -import static org.dkpro.core.testing.AssertAnnotations.assertToken; - -import org.apache.uima.analysis_engine.AnalysisEngine; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.harness.SegmenterHarness; -import org.junit.jupiter.api.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; - -public class CogrooSegmenterTest -{ - @Test - public void testPortuguese() throws Exception - { - final String text = "Este é um teste. E mais um."; - final String[] sentences = new String[] { "Este é um teste.", "E mais um." }; - final String[] tokens = new String[] { "Este", "é", "um", "teste", ".", "E", "mais", - "um", "." }; - - JCas jcas = runTest("pt-BR", text); - - assertSentence(sentences, select(jcas, Sentence.class)); - assertToken(tokens, select(jcas, Token.class)); - } - - private JCas runTest(String aLanguage, String aDocument) - throws Exception - { - AnalysisEngine engine = createEngine(CogrooSegmenter.class); - - JCas jcas = engine.newJCas(); - jcas.setDocumentText(aDocument); - jcas.setDocumentLanguage(aLanguage); - - engine.process(jcas); - - return jcas; - } - - @Test - public void testZoning() throws Exception - { - SegmenterHarness.testZoning(CogrooSegmenter.class, "pt-BR"); - } -} diff --git a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/SimpleTest.java b/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/SimpleTest.java deleted file mode 100644 index d0856537c5..0000000000 --- a/dkpro-core-cogroo-asl/src/test/java/org/dkpro/core/cogroo/SimpleTest.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.cogroo; - -import java.util.Locale; - -import org.cogroo.analyzer.Analyzer; -import org.cogroo.analyzer.ComponentFactory; -import org.cogroo.text.Document; -import org.cogroo.text.impl.DocumentImpl; -import org.junit.jupiter.api.Test; - -public class SimpleTest -{ - @Test - public void lala() - { - ComponentFactory factory = ComponentFactory.create(Locale.forLanguageTag("pt-BR")); - Analyzer sentenceDetector = factory.createSentenceDetector(); - Analyzer tokenizer = factory.createTokenizer(); - Analyzer nameFinder = factory.createNameFinder(); - Analyzer contractionFinder = factory.createContractionFinder(); - Analyzer posTagger = factory.createPOSTagger(); - Analyzer featurizer = factory.createFeaturizer(); - Analyzer lemmatizer = factory.createLemmatizer(); - Analyzer chunker = factory.createChunker(); - Analyzer headFinder = factory.createHeadFinder(); - Analyzer shallowParser = factory.createShallowParser(); - - Document doc = new DocumentImpl(); - doc.setText("Este é um test. Queria saber mais."); - - sentenceDetector.analyze(doc);; - tokenizer.analyze(doc); - nameFinder.analyze(doc); - contractionFinder.analyze(doc); - posTagger.analyze(doc); - lemmatizer.analyze(doc); - featurizer.analyze(doc); - chunker.analyze(doc); - headFinder.analyze(doc); - shallowParser.analyze(doc); - - System.out.println(doc.getSentences()); - } -}