From 3e1d1c5453becb44393922759b4cc2f582237fdb Mon Sep 17 00:00:00 2001 From: Richard Zowalla Date: Tue, 29 Oct 2024 13:57:07 +0100 Subject: [PATCH] OPENNLP-1634 - Move OpenNLP Brat Annotator back to Sandbox --- README.md | 1 - opennlp-brat-annotator/pom.xml | 117 --------------- .../src/main/bin/brat-annotation-service | 56 ------- .../src/main/bin/brat-annotation-service.bat | 51 ------- .../opennlp/bratann/NameFinderAnnService.java | 102 ------------- .../opennlp/bratann/NameFinderResource.java | 138 ------------------ opennlp-distr/pom.xml | 6 +- opennlp-distr/src/main/assembly/bin.xml | 14 -- pom.xml | 11 +- 9 files changed, 2 insertions(+), 494 deletions(-) delete mode 100644 opennlp-brat-annotator/pom.xml delete mode 100755 opennlp-brat-annotator/src/main/bin/brat-annotation-service delete mode 100755 opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat delete mode 100644 opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java delete mode 100644 opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java diff --git a/README.md b/README.md index 495b2f2a0..25d382064 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,6 @@ Currently, the library has different packages: * `opennlp-tools` : The core toolkit. * `opennlp-tools-models` : A set of classes to load [OpenNLP models](https://github.com/apache/opennlp-models) from the classpath. * `opennlp-uima` : A set of [Apache UIMA](https://uima.apache.org) annotators. -* `opennlp-brat-annotator` : A set of annotators for [BRAT](http://brat.nlplab.org/) * `opennlp-morfologik-addon` : An addon for Morfologik * `opennlp-dl` : OpenNLP interface implementations for ONNX models using the `onnxruntime` dependency. * `opennlp-dl-gpu` : Replaces `onnxruntime` with the `onnxruntime_gpu` dependency to support GPU acceleration. diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml deleted file mode 100644 index 58426af53..000000000 --- a/opennlp-brat-annotator/pom.xml +++ /dev/null @@ -1,117 +0,0 @@ - - - - - - 4.0.0 - - - org.apache.opennlp - opennlp - 2.4.1-SNAPSHOT - ../pom.xml - - - opennlp-brat-annotator - jar - - Apache OpenNLP Brat Annotator - - - UTF-8 - - - - - org.slf4j - slf4j-api - - - - org.glassfish.jersey.containers - jersey-container-grizzly2-http - ${jersey.version} - - - - org.glassfish.jersey.media - jersey-media-json-jackson - ${jersey.version} - runtime - - - - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - runtime - - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - runtime - - - - com.fasterxml.jackson.module - jackson-module-jaxb-annotations - ${jackson.version} - runtime - - - - org.apache.opennlp - opennlp-tools - - - - org.junit.jupiter - junit-jupiter-api - test - - - - org.junit.jupiter - junit-jupiter-engine - test - - - - org.slf4j - slf4j-simple - test - - - - - - maven-assembly-plugin - - - jar-with-dependencies - - - - - make-assembly - package - - single - - - - - - - diff --git a/opennlp-brat-annotator/src/main/bin/brat-annotation-service b/opennlp-brat-annotator/src/main/bin/brat-annotation-service deleted file mode 100755 index eac956681..000000000 --- a/opennlp-brat-annotator/src/main/bin/brat-annotation-service +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Note: Do not output anything in this script file, any output -# may be inadvertantly placed in any output files if -# output redirection is used. - -# determine OPENNLP_HOME - $0 may be a symlink to OpenNLP's home -PRG="$0" - -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG="`dirname "$PRG"`/$link" - fi -done - -saveddir=`pwd` - -OPENNLP_HOME=`dirname "$PRG"`/.. - -# make it fully qualified -OPENNLP_HOME=`cd "$OPENNLP_HOME" && pwd` - -cd "$saveddir" - -if [ -z "$JAVACMD" ] ; then - if [ -n "$JAVA_HOME" ] ; then - JAVACMD="$JAVA_HOME/bin/java" - else - JAVACMD="`which java`" - fi -fi - -CLASSPATH=$(echo $OPENNLP_HOME/lib/*.jar | tr ' ' ':') - -$JAVACMD -Xmx1024m -Dlog4j.configurationFile="$OPENNLP_HOME/conf/log4j2.xml" -cp "$CLASSPATH" opennlp.bratann.NameFinderAnnService $@ diff --git a/opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat b/opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat deleted file mode 100755 index 289248b4b..000000000 --- a/opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat +++ /dev/null @@ -1,51 +0,0 @@ -@ECHO off - -REM # Licensed to the Apache Software Foundation (ASF) under one -REM # or more contributor license agreements. See the NOTICE file -REM # distributed with this work for additional information -REM # regarding copyright ownership. The ASF licenses this file -REM # to you under the Apache License, Version 2.0 (the -REM # "License"); you may not use this file except in compliance -REM # with the License. You may obtain a copy of the License at -REM # -REM # http://www.apache.org/licenses/LICENSE-2.0 -REM # -REM # Unless required by applicable law or agreed to in writing, -REM # software distributed under the License is distributed on an -REM # # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM # KIND, either express or implied. See the License for the -REM # specific language governing permissions and limitations -REM # under the License. - -REM # Note: Do not output anything in this script file, any output -REM # may be inadvertantly placed in any output files if -REM # output redirection is used. -SETLOCAL - -IF "%JAVA_CMD%" == "" ( - IF "%JAVA_HOME%" == "" ( - SET JAVA_CMD=java - ) ELSE ( - REM # Keep JAVA_HOME to short-name without spaces - FOR %%A IN ("%JAVA_HOME%") DO SET JAVA_CMD=%%~sfA\bin\java - ) -) - -REM # Should work with Windows XP and greater. If not, specify the path to where it is installed. -IF "%OPENNLP_HOME%" == "" ( - SET OPENNLP_HOME=%~sp0.. -) ELSE ( - REM # Keep OPENNLP_HOME to short-name without spaces - FOR %%A IN ("%OPENNLP_HOME%") DO SET OPENNLP_HOME=%%~sfA -) -setLocal EnableDelayedExpansion -set CLASSPATH=" - -FOR %%A IN ("%OPENNLP_HOME%\lib\*.jar") DO ( - set CLASSPATH=!CLASSPATH!;%%A -) -set CLASSPATH=!CLASSPATH!" - -%JAVA_CMD% -Xmx1024m "-Dlog4j.configurationFile=%OPENNLP_HOME%\conf\log4j2.xml" -cp %CLASSPATH% opennlp.bratann.NameFinderAnnService %* - -ENDLOCAL \ No newline at end of file diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java deleted file mode 100644 index 1735cb888..000000000 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.bratann; - -import java.io.File; -import java.net.URI; -import java.util.Arrays; -import java.util.List; - -import jakarta.ws.rs.core.UriBuilder; -import org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory; -import org.glassfish.jersey.server.ResourceConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import opennlp.tools.namefind.NameFinderME; -import opennlp.tools.namefind.TokenNameFinder; -import opennlp.tools.namefind.TokenNameFinderModel; -import opennlp.tools.sentdetect.NewlineSentenceDetector; -import opennlp.tools.sentdetect.SentenceDetector; -import opennlp.tools.sentdetect.SentenceDetectorME; -import opennlp.tools.sentdetect.SentenceModel; -import opennlp.tools.tokenize.SimpleTokenizer; -import opennlp.tools.tokenize.Tokenizer; -import opennlp.tools.tokenize.TokenizerME; -import opennlp.tools.tokenize.TokenizerModel; -import opennlp.tools.tokenize.WhitespaceTokenizer; - -public class NameFinderAnnService { - - private static final Logger LOG = LoggerFactory.getLogger(NameFinderAnnService.class); - static SentenceDetector sentenceDetector = new NewlineSentenceDetector(); - static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE; - static TokenNameFinder[] nameFinders; - - public static void main(String[] args) throws Exception { - - if (args.length == 0) { - LOG.info("Usage:"); - LOG.info("[NameFinderAnnService -serverPort port] [-tokenizerModel file] " - + "[-ruleBasedTokenizer whitespace|simple] " - + "[-sentenceDetectorModel file] namefinderFile|nameFinderURI"); - return; - } - - List argList = Arrays.asList(args); - - int serverPort = 8080; - int serverPortIndex = argList.indexOf("-serverPort") + 1; - - if (serverPortIndex > 0 && serverPortIndex < args.length) { - serverPort = Integer.parseInt(args[serverPortIndex]); - } - - int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel") + 1; - if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) { - sentenceDetector = new SentenceDetectorME( - new SentenceModel(new File(args[sentenceModelIndex]))); - } - - int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1; - - if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) { - if ("whitespace".equals(args[ruleBasedTokenizerIndex])) { - tokenizer = WhitespaceTokenizer.INSTANCE; - } else if ("simple".equals(args[ruleBasedTokenizerIndex])) { - tokenizer = SimpleTokenizer.INSTANCE; - } else { - LOG.error("unknown tokenizer: {}", args[ruleBasedTokenizerIndex]); - return; - } - } - - int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1; - if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) { - tokenizer = new TokenizerME( - new TokenizerModel(new File(args[tokenizerModelIndex]))); - } - - nameFinders = new TokenNameFinder[] {new NameFinderME( - new TokenNameFinderModel(new File(args[args.length - 1])))}; - - URI baseUri = UriBuilder.fromUri("http://localhost/").port(serverPort).build(); - ResourceConfig config = new ResourceConfig(NameFinderResource.class); - GrizzlyHttpServerFactory.createHttpServer(baseUri, config); - } -} diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java deleted file mode 100644 index f824c18ce..000000000 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.bratann; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.Produces; -import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.core.MediaType; - -import opennlp.tools.namefind.TokenNameFinder; -import opennlp.tools.sentdetect.SentenceDetector; -import opennlp.tools.tokenize.Tokenizer; -import opennlp.tools.util.Span; - -@Path("/ner") -public class NameFinderResource { - - private final SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector; - private final Tokenizer tokenizer = NameFinderAnnService.tokenizer; - private final TokenNameFinder[] nameFinders = NameFinderAnnService.nameFinders; - - private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset, int endOffset) { - for (int i = beginOffset; i < endOffset; i++) { - if (!Character.isSpaceChar(s.charAt(i))) { - return i; - } - } - return -1; - } - - @POST - @Consumes(MediaType.TEXT_PLAIN) - @Produces(MediaType.APPLICATION_JSON) - public Map findNames(@QueryParam("model") String modelName, String text) { - Span[] sentenceSpans = sentDetect.sentPosDetect(text); - Map map = new HashMap<>(); - - int indexCounter = 0; - - for (Span sentenceSpan : sentenceSpans) { - - String sentenceText = sentenceSpan.getCoveredText(text).toString(); - - // offset of sentence gets lost here! - Span[] tokenSpans = tokenizer.tokenizePos(sentenceText); - - String[] tokens = Span.spansToStrings(tokenSpans, sentenceText); - - for (TokenNameFinder nameFinder : nameFinders) { - Span[] names = nameFinder.find(tokens); - - for (Span name : names) { - - int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpan.getStart(); - int endOffset = tokenSpans[name.getEnd() - 1].getEnd() + sentenceSpan.getStart(); - - // create a list of new line indexes - List newLineIndexes = new ArrayList<>(); - - // TODO: Code needs to handle case that there are multiple new lines - // in a row - - boolean inNewLineSequence = false; - for (int ci = beginOffset; ci < endOffset; ci++) { - if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') { - if (!inNewLineSequence) { - newLineIndexes.add(ci); - } - inNewLineSequence = true; - } else { - inNewLineSequence = false; - } - } - - List textSegments = new ArrayList<>(); - List spanSegments = new ArrayList<>(); - - int segmentBegin = beginOffset; - - for (int newLineOffset : newLineIndexes) { - // create segment from begin to offset - textSegments.add(text.substring(segmentBegin, newLineOffset)); - spanSegments.add(new int[] {segmentBegin, newLineOffset}); - - segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1, - endOffset); - - if (segmentBegin == -1) { - break; - } - } - - // create left over segment - if (segmentBegin != -1) { - textSegments.add(text.substring(segmentBegin, endOffset)); - spanSegments.add(new int[] {segmentBegin, endOffset}); - } - - NameAnn ann = new NameAnn(); - ann.texts = textSegments.toArray(new String[0]); - ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]); - ann.type = name.getType(); - - map.put(Integer.toString(indexCounter++), ann); - } - } - } - return map; - } - - public static class NameAnn { - public int[][] offsets; - public String[] texts; - public String type; - } -} diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml index cb5bfcb16..9d87ca919 100644 --- a/opennlp-distr/pom.xml +++ b/opennlp-distr/pom.xml @@ -50,11 +50,7 @@ org.apache.opennlp opennlp-morfologik-addon - - org.apache.opennlp - opennlp-brat-annotator - - + org.slf4j slf4j-api diff --git a/opennlp-distr/src/main/assembly/bin.xml b/opennlp-distr/src/main/assembly/bin.xml index 1a92bfd7c..1c8d7d8bc 100644 --- a/opennlp-distr/src/main/assembly/bin.xml +++ b/opennlp-distr/src/main/assembly/bin.xml @@ -100,13 +100,6 @@ bin - - ../opennlp-brat-annotator/src/main/bin - 755 - 755 - bin - - ../opennlp-tools/lang 644 @@ -128,13 +121,6 @@ docs/apidocs/opennlp-tools - - ../opennlp-brat-annotator/target/apidocs - 644 - 755 - docs/apidocs/opennlp-brat-annotator - - ../opennlp-morfologik-addon/target/apidocs 644 diff --git a/pom.xml b/pom.xml index 871442cb2..d9fd74769 100644 --- a/pom.xml +++ b/pom.xml @@ -147,13 +147,7 @@ ${project.version} - - opennlp-brat-annotator - ${project.groupId} - ${project.version} - - - + opennlp-morfologik-addon ${project.groupId} ${project.version} @@ -168,8 +162,6 @@ ${java.version} ${java.version} 3.3.9 - 2.18.1 - 3.1.9 5.11.3 2.0.2 3.5.0 @@ -545,7 +537,6 @@ opennlp-tools opennlp-uima - opennlp-brat-annotator opennlp-morfologik-addon opennlp-docs opennlp-distr