diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 2c2f702..5b00dcc 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -1,4 +1,4 @@
-name: Docker
+name: Docker build & push
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
diff --git a/src/templates/form.html b/src/templates/form.html
index 9dac9e0..620cae8 100644
--- a/src/templates/form.html
+++ b/src/templates/form.html
@@ -1,14 +1,54 @@
-
+
- Simple Form
+
+
+ YoYo MaskR
- upload text to mask
-
+
+ Response:
+
+
+
-
\ No newline at end of file
+
diff --git a/src/utils/llm.py b/src/utils/llm.py
index 45c7ed0..0e01795 100644
--- a/src/utils/llm.py
+++ b/src/utils/llm.py
@@ -8,7 +8,7 @@
Task instructions: Analyze the text provided after 'Text to anonymize' carefully for all names of persons and places.
For each name that you find evaluate whether it is a new name or just a repetition or variation of a name you have already found before.
Names of persons are labeled as #person_1#, #person_2#, etc. Names of places are labeled as #place_1#, #place_2#, etc.
-Only return a json dictionary without any comments or markdown formatting around it.
+Only return a json dictionary without any comments or markdown formatting around it. Do not return keys without values.
Example input: 'Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway.'.
Example output: {{"#person_1#": ["Tony Stark", "Tony"], "#person_2#": ["Peter Parker", "Peter"], "#place_1#": ["New York"], "#place_2#": ["Broadway"]}}
Text to anonymize: {text}
@@ -31,4 +31,6 @@ def llm_find_entities(text, temperature=0, template=TEMPLATE, raw=False):
result = chain.invoke({"text": text})
if raw:
return result
- return {k: sorted(v, key=len, reverse=True) for k, v in json.loads(result).items()}
+
+ ret = {k: v for k, v in json.loads(result).items()}
+ return {k: set(v) for k, v in ret.items() if v}
diff --git a/tests/test_llm.py b/tests/test_llm.py
new file mode 100644
index 0000000..23f173e
--- /dev/null
+++ b/tests/test_llm.py
@@ -0,0 +1,36 @@
+import pytest
+from src.utils.llm import llm_find_entities
+
+def test_llm_find_entities_basic():
+ text = "Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway."
+ expected_output = {'#person_1#': {'Tony Stark', 'Tony'}, '#person_2#': {'Peter Parker', 'Peter'}, '#place_1#': {'New York'}, '#place_2#': {'Broadway'}}
+ result = llm_find_entities(text)
+ assert result == expected_output
+ assert result == expected_output
+
+def test_llm_find_entities_no_entities():
+ text = "This is a text without any names of persons or places."
+ expected_output = {}
+ result = llm_find_entities(text)
+ assert result == expected_output
+
+def test_llm_find_entities_repeated_names():
+ text = "Alice and Bob went to Wonderland. Alice met Bob at the Wonderland park."
+ expected_output = {
+ "#person_1#": ["Alice"],
+ "#person_2#": ["Bob"],
+ "#place_1#": ["Wonderland"],
+ '#place_2#': ['Wonderland park']
+ }
+ result = llm_find_entities(text)
+
+def test_llm_find_entities_raw_output():
+ text = "Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway."
+ result = llm_find_entities(text, raw=True)
+ print("result")
+ print(result)
+ assert isinstance(result, str)
+ assert "Tony Stark" in result
+ assert "Peter Parker" in result
+ assert "New York" in result
+ assert "Broadway" in result
\ No newline at end of file