-
Notifications
You must be signed in to change notification settings - Fork 156
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4427 from inception-project/feature/4292-ollama-b…
…ased-recommender #4292 - ollama-based recommender
- Loading branch information
Showing
12 changed files
with
276 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
...va/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/AnnotationWrapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
* Licensed to the Technische Universität Darmstadt under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The Technische Universität Darmstadt | ||
* licenses this file to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava; | ||
|
||
import java.util.AbstractMap; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
import org.apache.uima.cas.CAS; | ||
import org.apache.uima.cas.Feature; | ||
import org.apache.uima.jcas.tcas.Annotation; | ||
|
||
public class AnnotationWrapper | ||
extends AbstractMap<String, Object> | ||
{ | ||
private static final String COVERED_TEXT = "$coveredText"; | ||
|
||
private static final Set<String> FEATURE_BLACKLIST = Set.of(CAS.FEATURE_BASE_NAME_SOFA); | ||
|
||
private final Annotation annotation; | ||
|
||
public AnnotationWrapper(Annotation aAnnotation) | ||
{ | ||
annotation = aAnnotation; | ||
} | ||
|
||
@Override | ||
public Object get(Object aKey) | ||
{ | ||
if (aKey instanceof String key) { | ||
if (COVERED_TEXT.equals(key)) { | ||
return annotation.getCoveredText(); | ||
} | ||
|
||
var feature = annotation.getType().getFeatureByBaseName(key); | ||
if (feature != null) { | ||
return annotation.getFeatureValueAsString(feature); | ||
} | ||
} | ||
|
||
return null; | ||
} | ||
|
||
@Override | ||
public Set<String> keySet() | ||
{ | ||
var features = annotation.getType().getFeatures().stream() // | ||
.map(Feature::getShortName) // | ||
.filter(name -> !FEATURE_BLACKLIST.contains(name)); | ||
|
||
var specials = Stream.of(COVERED_TEXT); | ||
|
||
return Stream.concat(features, specials).collect(Collectors.toSet()); | ||
} | ||
|
||
@Override | ||
public Set<Entry<String, Object>> entrySet() | ||
{ | ||
return keySet().stream().map(k -> new SimpleEntry<>(k, get(k))).collect(Collectors.toSet()); | ||
} | ||
|
||
@Override | ||
public String toString() | ||
{ | ||
return annotation.getCoveredText(); | ||
} | ||
} |
57 changes: 57 additions & 0 deletions
57
...main/java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Licensed to the Technische Universität Darmstadt under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The Technische Universität Darmstadt | ||
* licenses this file to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava; | ||
|
||
import java.util.List; | ||
|
||
import org.apache.uima.cas.CAS; | ||
import org.apache.uima.cas.Type; | ||
import org.apache.uima.jcas.tcas.Annotation; | ||
|
||
public class CasWrapper | ||
{ | ||
private final CAS cas; | ||
|
||
public CasWrapper(CAS aCas) | ||
{ | ||
cas = aCas; | ||
} | ||
|
||
public List<AnnotationWrapper> select(String aTypeName) | ||
{ | ||
var type = getType(aTypeName); | ||
|
||
return cas.<Annotation> select(type).map(AnnotationWrapper::new).toList(); | ||
} | ||
|
||
private Type getType(String aName) | ||
{ | ||
var type = cas.getTypeSystem().getType(aName); | ||
if (type != null) { | ||
return type; | ||
} | ||
|
||
for (var t : cas.getTypeSystem()) { | ||
if (t.getShortName().equals(aName)) { | ||
return t; | ||
} | ||
} | ||
|
||
return null; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 changes: 45 additions & 0 deletions
45
...main/resources/META-INF/asciidoc/user-guide/projects_recommendation_ollama.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
// Licensed to the Technische Universität Darmstadt under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The Technische Universität Darmstadt | ||
// licenses this file to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
== Ollama | ||
|
||
==== | ||
CAUTION: Experimental feature. To use this functionality, you need to enable it first by adding `recommender.ollama.enabled=true` to the `settings.properties` file (see the <<admin-guide.adoc#sect_settings, Admin Guide>>). | ||
==== | ||
|
||
This recommender allows to obtain annotation suggestions using large language models (LLMs) supported by link:https://ollama.ai[Ollama]. In order to use it, you first need to install Ollama and run it. | ||
|
||
.Installing and running Ollama on macOS using homebrew | ||
[source,sh] | ||
---- | ||
$ brew install ollama | ||
$ ollama pull mistral | ||
$ ollama serve mistral | ||
---- | ||
|
||
By default, Ollama runs on `http://localhost:11434/` and {product-name} uses this as the default endpoint for communicating with it. If you run Ollama on a different host (e.g. one that has a more powerful GPU) or port, you can adjust this URL in the recommdener settings. | ||
|
||
If {product-name} can successfully connect to Ollama, the **model** combo-box will offer all models that are available on the respective endpoint. If you want to use a model that is not listed here, you first need to `ollama pull` it. | ||
|
||
Now you can configure how to generate the prompts that are sent to Ollama and how to interpret its response using the following settings: | ||
|
||
* **Prompting mode:** here you can choose to generate one prompt **per sentence**, **per annotation** or **per document**. | ||
* **Response format:** here you can choose how to read the response from Ollama. The choice is between **default** (i.e. text) and a **JSON** format. | ||
* **Extraction mode:** here you can choose how interpret the response from Ollama. The availability of different extraction modes depends on the type of layer for which the recommender is configured. Choose **response as label** e.g. for classification or summarization tasks. It puts the response from the LLM directly into the feature that you configured the recommender to operate on. Choose **Mentions from JSON** (span layer) for information extraction tasks where you ask the LLM e.g. to identify and categorize certain types of entities in the text. | ||
* **Prompt:** Here you can finally define the prompt that is sent to Ollama. The prompt should usually consist of an instruction and a piece of text to which the instruction is to be applied. Depending on the prompting mode, there are different variables that can be used in the prompt. The most important variable is `text` and it corresponds to the sentence text, annotated words or document text, depending on the prompting mode. | ||
|
||
The recommender comes with several example configurations that you can choose from a drop-down field. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
.../java/de/tudarmstadt/ukp/inception/recommendation/imls/ollama/jinjava/CasWrapperTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* Licensed to the Technische Universität Darmstadt under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The Technische Universität Darmstadt | ||
* licenses this file to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava; | ||
|
||
import static de.tudarmstadt.ukp.inception.support.uima.AnnotationBuilder.buildAnnotation; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
import java.util.Map; | ||
|
||
import org.apache.uima.fit.factory.CasFactory; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import com.hubspot.jinjava.Jinjava; | ||
import com.hubspot.jinjava.JinjavaConfig; | ||
|
||
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; | ||
|
||
class CasWrapperTest | ||
{ | ||
private Jinjava jinjava; | ||
|
||
@BeforeEach | ||
void setup() | ||
{ | ||
var config = new JinjavaConfig(); | ||
jinjava = new Jinjava(config); | ||
} | ||
|
||
@Test | ||
void thatSelectCanAccessAnnotationsFromCas() throws Exception | ||
{ | ||
var script = """ | ||
{% for x in cas.select('NamedEntity') %} | ||
{{ x }}{% endfor %}"""; | ||
|
||
var bindings = Map.of("test", "test"); | ||
|
||
var cas = CasFactory.createCas(); | ||
cas.setDocumentText(""" | ||
My name is John McCain. | ||
His name is Mickey."""); | ||
buildAnnotation(cas, NamedEntity.class).on("John McCain").buildAndAddToIndexes(); | ||
buildAnnotation(cas, NamedEntity.class).on("Mickey").buildAndAddToIndexes(); | ||
|
||
jinjava.getGlobalContext().put("cas", new CasWrapper(cas)); | ||
|
||
var result = jinjava.render(script, bindings); | ||
|
||
assertThat(result).contains("John McCain\nMickey"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters