From 631d247df4103e288a6caa44730e4a679a2e862d Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 4 Jan 2024 21:42:00 +0100 Subject: [PATCH] #4292 - ollama-based recommender - Allow accessing annotations from the CAS in the prompt template - Added some documentation --- .../META-INF/asciidoc/user-guide.adoc | 2 + .../OllamaRecommenderTraitsEditor.properties | 2 +- .../ollama/jinjava/AnnotationWrapper.java | 83 +++++++++++++++++++ .../imls/ollama/jinjava/CasWrapper.java | 57 +++++++++++++ .../prompt/PerAnnotationContextGenerator.java | 2 + .../prompt/PerDocumentContextGenerator.java | 2 + .../prompt/PerSentenceContextGenerator.java | 2 + .../ollama/prompt/PromptContextGenerator.java | 1 + .../projects_recommendation_ollama.adoc | 45 ++++++++++ .../recommendation/imls/ollama/presets.yaml | 14 +++- .../imls/ollama/jinjava/CasWrapperTest.java | 67 +++++++++++++++ ...umentMetadataAnnotationSelectionPanel.html | 2 +- 12 files changed, 276 insertions(+), 3 deletions(-) create mode 100644 inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/AnnotationWrapper.java create mode 100644 inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapper.java create mode 100644 inception/inception-imls-ollama/src/main/resources/META-INF/asciidoc/user-guide/projects_recommendation_ollama.adoc create mode 100644 inception/inception-imls-ollama/src/test/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapperTest.java diff --git a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc index e9a63b9530a..b44bc9451f1 100644 --- a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc +++ b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc @@ -145,6 +145,8 @@ include::{include-dir}projects_recommendation_string_relation.adoc[leveloffset=+ include::{include-dir}projects_recommendation_opennlp.adoc[leveloffset=+2] +include::{include-dir}projects_recommendation_ollama.adoc[leveloffset=+2] + include::{include-dir}projects_recommendation_conceptlinker.adoc[leveloffset=+2] include::{include-dir}projects_recommendation_external.adoc[leveloffset=+2] diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/OllamaRecommenderTraitsEditor.properties b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/OllamaRecommenderTraitsEditor.properties index bb67f2e8b72..5401d795f90 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/OllamaRecommenderTraitsEditor.properties +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/OllamaRecommenderTraitsEditor.properties @@ -20,7 +20,7 @@ raw=Raw prompt preset=Preset options=Advanced options -promptingMode=Processing mode +promptingMode=Prompting mode PromptingMode.PER_ANNOTATION=Per annotation PromptingMode.PER_SENTENCE=Per sentence PromptingMode.PER_DOCUMENT=Per document diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/AnnotationWrapper.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/AnnotationWrapper.java new file mode 100644 index 00000000000..46e49b1cb8f --- /dev/null +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/AnnotationWrapper.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava; + +import java.util.AbstractMap; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Feature; +import org.apache.uima.jcas.tcas.Annotation; + +public class AnnotationWrapper + extends AbstractMap +{ + private static final String COVERED_TEXT = "$coveredText"; + + private static final Set FEATURE_BLACKLIST = Set.of(CAS.FEATURE_BASE_NAME_SOFA); + + private final Annotation annotation; + + public AnnotationWrapper(Annotation aAnnotation) + { + annotation = aAnnotation; + } + + @Override + public Object get(Object aKey) + { + if (aKey instanceof String key) { + if (COVERED_TEXT.equals(key)) { + return annotation.getCoveredText(); + } + + var feature = annotation.getType().getFeatureByBaseName(key); + if (feature != null) { + return annotation.getFeatureValueAsString(feature); + } + } + + return null; + } + + @Override + public Set keySet() + { + var features = annotation.getType().getFeatures().stream() // + .map(Feature::getShortName) // + .filter(name -> !FEATURE_BLACKLIST.contains(name)); + + var specials = Stream.of(COVERED_TEXT); + + return Stream.concat(features, specials).collect(Collectors.toSet()); + } + + @Override + public Set> entrySet() + { + return keySet().stream().map(k -> new SimpleEntry<>(k, get(k))).collect(Collectors.toSet()); + } + + @Override + public String toString() + { + return annotation.getCoveredText(); + } +} diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapper.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapper.java new file mode 100644 index 00000000000..dd73b5d33cb --- /dev/null +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapper.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava; + +import java.util.List; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Type; +import org.apache.uima.jcas.tcas.Annotation; + +public class CasWrapper +{ + private final CAS cas; + + public CasWrapper(CAS aCas) + { + cas = aCas; + } + + public List select(String aTypeName) + { + var type = getType(aTypeName); + + return cas. select(type).map(AnnotationWrapper::new).toList(); + } + + private Type getType(String aName) + { + var type = cas.getTypeSystem().getType(aName); + if (type != null) { + return type; + } + + for (var t : cas.getTypeSystem()) { + if (t.getShortName().equals(aName)) { + return t; + } + } + + return null; + } +} diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerAnnotationContextGenerator.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerAnnotationContextGenerator.java index 3f6f02a3804..0994d8cc0fb 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerAnnotationContextGenerator.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerAnnotationContextGenerator.java @@ -25,6 +25,7 @@ import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine; +import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava.CasWrapper; public class PerAnnotationContextGenerator implements PromptContextGenerator @@ -40,6 +41,7 @@ public Stream generate(RecommendationEngine aEngine, CAS aCas, in .map(Sentence::getCoveredText) // .findFirst().orElse(""); var context = new PromptContext(candidate); + context.set(VAR_CAS, new CasWrapper(aCas)); context.set(VAR_TEXT, candidate.getCoveredText()); context.set(VAR_SENTENCE, sentence); return context; diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerDocumentContextGenerator.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerDocumentContextGenerator.java index 43a02497b33..efc1bbf2dbe 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerDocumentContextGenerator.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerDocumentContextGenerator.java @@ -22,6 +22,7 @@ import org.apache.uima.cas.CAS; import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine; +import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava.CasWrapper; public class PerDocumentContextGenerator implements PromptContextGenerator @@ -33,6 +34,7 @@ public Stream generate(RecommendationEngine aEngine, CAS aCas, in { var candidate = aCas.getDocumentAnnotation(); var context = new PromptContext(candidate); + context.set(VAR_CAS, new CasWrapper(aCas)); context.set(VAR_TEXT, aCas.getDocumentText()); return Stream.of(context); } diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerSentenceContextGenerator.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerSentenceContextGenerator.java index 613326bf6d3..4500ff98e3e 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerSentenceContextGenerator.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PerSentenceContextGenerator.java @@ -26,6 +26,7 @@ import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine; +import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava.CasWrapper; public class PerSentenceContextGenerator implements PromptContextGenerator @@ -40,6 +41,7 @@ public Stream generate(RecommendationEngine aEngine, CAS aCas, in return candidates.stream().map(candidate -> { var context = new PromptContext(candidate); + context.set(VAR_CAS, new CasWrapper(aCas)); context.set(VAR_TEXT, candidate.getCoveredText()); return context; }); diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PromptContextGenerator.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PromptContextGenerator.java index 84bf5214500..a5e1c65e32b 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PromptContextGenerator.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/prompt/PromptContextGenerator.java @@ -29,6 +29,7 @@ public interface PromptContextGenerator static final String VAR_SENTENCE = "sentence"; static final String VAR_DOCUMENT = "document"; static final String VAR_EXAMPLES = "examples"; + static final String VAR_CAS = "cas"; Stream generate(RecommendationEngine aEngine, CAS aCas, int aBegin, int aEnd); } diff --git a/inception/inception-imls-ollama/src/main/resources/META-INF/asciidoc/user-guide/projects_recommendation_ollama.adoc b/inception/inception-imls-ollama/src/main/resources/META-INF/asciidoc/user-guide/projects_recommendation_ollama.adoc new file mode 100644 index 00000000000..d42b80722e5 --- /dev/null +++ b/inception/inception-imls-ollama/src/main/resources/META-INF/asciidoc/user-guide/projects_recommendation_ollama.adoc @@ -0,0 +1,45 @@ +// Licensed to the Technische Universität Darmstadt under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The Technische Universität Darmstadt +// licenses this file to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +== Ollama + +==== +CAUTION: Experimental feature. To use this functionality, you need to enable it first by adding `recommender.ollama.enabled=true` to the `settings.properties` file (see the <>). +==== + +This recommender allows to obtain annotation suggestions using large language models (LLMs) supported by link:https://ollama.ai[Ollama]. In order to use it, you first need to install Ollama and run it. + +.Installing and running Ollama on macOS using homebrew +[source,sh] +---- +$ brew install ollama +$ ollama pull mistral +$ ollama serve mistral +---- + +By default, Ollama runs on `http://localhost:11434/` and {product-name} uses this as the default endpoint for communicating with it. If you run Ollama on a different host (e.g. one that has a more powerful GPU) or port, you can adjust this URL in the recommdener settings. + +If {product-name} can successfully connect to Ollama, the **model** combo-box will offer all models that are available on the respective endpoint. If you want to use a model that is not listed here, you first need to `ollama pull` it. + +Now you can configure how to generate the prompts that are sent to Ollama and how to interpret its response using the following settings: + +* **Prompting mode:** here you can choose to generate one prompt **per sentence**, **per annotation** or **per document**. +* **Response format:** here you can choose how to read the response from Ollama. The choice is between **default** (i.e. text) and a **JSON** format. +* **Extraction mode:** here you can choose how interpret the response from Ollama. The availability of different extraction modes depends on the type of layer for which the recommender is configured. Choose **response as label** e.g. for classification or summarization tasks. It puts the response from the LLM directly into the feature that you configured the recommender to operate on. Choose **Mentions from JSON** (span layer) for information extraction tasks where you ask the LLM e.g. to identify and categorize certain types of entities in the text. +* **Prompt:** Here you can finally define the prompt that is sent to Ollama. The prompt should usually consist of an instruction and a piece of text to which the instruction is to be applied. Depending on the prompting mode, there are different variables that can be used in the prompt. The most important variable is `text` and it corresponds to the sentence text, annotated words or document text, depending on the prompting mode. + +The recommender comes with several example configurations that you can choose from a drop-down field. + diff --git a/inception/inception-imls-ollama/src/main/resources/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/presets.yaml b/inception/inception-imls-ollama/src/main/resources/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/presets.yaml index 6a629f9431a..56de49db874 100644 --- a/inception/inception-imls-ollama/src/main/resources/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/presets.yaml +++ b/inception/inception-imls-ollama/src/main/resources/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/presets.yaml @@ -42,7 +42,7 @@ {{ text }} ``` -- name: Summarize text +- name: Summarize document promptingMode: per-document extractionMode: response-as-label prompt: |- @@ -52,6 +52,18 @@ {{ text }} ``` +- name: Summarize annotated spans + promptingMode: per-document + extractionMode: response-as-label + prompt: |- + Briefly summarize the following text. + + ``` + {% for x in cas.select('custom.Span') %} + {{ x }} + {% endfor %} + ``` + - name: Disambiguate in sentence promptingMode: per-annotation extractionMode: response-as-label diff --git a/inception/inception-imls-ollama/src/test/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapperTest.java b/inception/inception-imls-ollama/src/test/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapperTest.java new file mode 100644 index 00000000000..43a54bcccb6 --- /dev/null +++ b/inception/inception-imls-ollama/src/test/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapperTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava; + +import static de.tudarmstadt.ukp.inception.support.uima.AnnotationBuilder.buildAnnotation; +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Map; + +import org.apache.uima.fit.factory.CasFactory; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.hubspot.jinjava.Jinjava; +import com.hubspot.jinjava.JinjavaConfig; + +import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; + +class CasWrapperTest +{ + private Jinjava jinjava; + + @BeforeEach + void setup() + { + var config = new JinjavaConfig(); + jinjava = new Jinjava(config); + } + + @Test + void thatSelectCanAccessAnnotationsFromCas() throws Exception + { + var script = """ + {% for x in cas.select('NamedEntity') %} + {{ x }}{% endfor %}"""; + + var bindings = Map.of("test", "test"); + + var cas = CasFactory.createCas(); + cas.setDocumentText(""" + My name is John McCain. + His name is Mickey."""); + buildAnnotation(cas, NamedEntity.class).on("John McCain").buildAndAddToIndexes(); + buildAnnotation(cas, NamedEntity.class).on("Mickey").buildAndAddToIndexes(); + + jinjava.getGlobalContext().put("cas", new CasWrapper(cas)); + + var result = jinjava.render(script, bindings); + + assertThat(result).contains("John McCain\nMickey"); + } +} diff --git a/inception/inception-layer-docmetadata/src/main/java/de/tudarmstadt/ukp/inception/ui/core/docanno/sidebar/DocumentMetadataAnnotationSelectionPanel.html b/inception/inception-layer-docmetadata/src/main/java/de/tudarmstadt/ukp/inception/ui/core/docanno/sidebar/DocumentMetadataAnnotationSelectionPanel.html index f8b7f2a58f1..f31988c0f7d 100644 --- a/inception/inception-layer-docmetadata/src/main/java/de/tudarmstadt/ukp/inception/ui/core/docanno/sidebar/DocumentMetadataAnnotationSelectionPanel.html +++ b/inception/inception-layer-docmetadata/src/main/java/de/tudarmstadt/ukp/inception/ui/core/docanno/sidebar/DocumentMetadataAnnotationSelectionPanel.html @@ -40,7 +40,7 @@
    -
  • +