From 0be467c148e8a66ee79dac36aa37a030fa3d37c3 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Mon, 30 Jan 2023 10:30:13 +0100
Subject: [PATCH] Semantic Viewer - show document when no words at the input
---
.../text/widgets/owsemanticviewer.py | 39 +++++++++++++------
.../widgets/tests/test_owsemanticviewer.py | 31 ++++++++++++++-
2 files changed, 56 insertions(+), 14 deletions(-)
diff --git a/orangecontrib/text/widgets/owsemanticviewer.py b/orangecontrib/text/widgets/owsemanticviewer.py
index 274076e4f..52cd2dc05 100644
--- a/orangecontrib/text/widgets/owsemanticviewer.py
+++ b/orangecontrib/text/widgets/owsemanticviewer.py
@@ -1,6 +1,6 @@
import re
from types import SimpleNamespace
-from typing import Optional, Any, List, Tuple
+from typing import Optional, Any, List, Tuple, Union
import numpy as np
@@ -8,7 +8,7 @@
QModelIndex
from AnyQt.QtWidgets import QTableView, QSplitter, QApplication
-from Orange.data import Table, Domain, StringVariable
+from Orange.data import Table
from Orange.widgets import gui
from Orange.widgets.settings import Setting
from Orange.widgets.utils.annotated_data import create_annotated_table
@@ -303,6 +303,8 @@ def set_words(self, words: Optional[Table]):
def handleNewSignals(self):
self._clear()
self.update_scores()
+ if self.corpus is not None:
+ self._list_documents()
def update_scores(self):
self.start(run, self.corpus, self.words)
@@ -321,20 +323,31 @@ def on_done(self, results: Results):
if not self._results or not self.corpus or not self.words:
self.commit()
return
+ self._list_documents()
+ def _list_documents(self):
model = self._list_view.model()
model.setHorizontalHeaderLabels(["Match", "Score", "Document"])
- def get_avg_score(result: List) -> float:
- return "NA" if result is None else np.mean([r[1] for r in result])
+ def get_avg_score(i: int) -> Union[float, str]:
+ if self._results is not None:
+ result = self._results[i]
+ return "NA" if result is None else np.mean([r[1] for r in result])
+ else:
+ return ""
def get_n_matches(ngram):
- return sum(ngram.count(word) for word in self.words)
+ if self.words is not None:
+ return sum(ngram.count(word) for word in self.words)
+ else:
+ return ""
- data = [[get_n_matches(ngram), get_avg_score(res), title]
- for res, title, ngram in zip(self._results,
- self.corpus.titles.tolist(),
- self.corpus.ngrams)]
+ data = [
+ [get_n_matches(ngram), get_avg_score(i), title]
+ for i, (title, ngram) in enumerate(
+ zip(self.corpus.titles.tolist(), self.corpus.ngrams)
+ )
+ ]
model.wrap(data)
for i in range(len(data)):
model.setData(model.index(i, 0), i, role=IndexRole)
@@ -370,7 +383,7 @@ def _set_selected_rows(self, selected_rows: List[int]):
)
def _show_documents(self):
- if self.corpus is None or self._results is None:
+ if self.corpus is None:
return
documents = self.corpus.documents
@@ -378,8 +391,10 @@ def _show_documents(self):
htmls = []
for doc_index in self.selection:
text = documents[doc_index]
- matches = [ind for ind, score in self._results[doc_index] or []
- if score >= self.threshold]
+ matches = []
+ if self._results:
+ matches = [ind for ind, score in self._results[doc_index] or []
+ if score >= self.threshold]
text = parser(text, matches)
text = text.replace("\n", "
")
html = f"{text}
"
diff --git a/orangecontrib/text/widgets/tests/test_owsemanticviewer.py b/orangecontrib/text/widgets/tests/test_owsemanticviewer.py
index a6d7b1c29..f43c2bcac 100644
--- a/orangecontrib/text/widgets/tests/test_owsemanticviewer.py
+++ b/orangecontrib/text/widgets/tests/test_owsemanticviewer.py
@@ -339,6 +339,25 @@ def test_table(self):
for j in range(model.columnCount()):
self.assertEqual(model.data(model.index(i, j)), table[i][j])
+ def test_table_no_words(self):
+ """When no words on the input still show documents but no scores"""
+ self.send_signal(self.widget.Inputs.corpus, self.corpus)
+ self.wait_until_finished()
+
+ model = self.widget._list_view.model()
+ table = [["", "", "Document 1"],
+ ["", "", "Document 2"],
+ ["", "", "Document 3"],
+ ["", "", "Document 4"],
+ ["", "", "Document 5"],
+ ["", "", "Document 6"],
+ ["", "", "Document 7"],
+ ["", "", "Document 8"],
+ ["", "", "Document 9"]]
+ for i in range(len(self.corpus)):
+ for j in range(model.columnCount()):
+ self.assertEqual(model.data(model.index(i, j)), table[i][j])
+
def test_webview(self):
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.send_signal(self.widget.Inputs.words, self.words)
@@ -398,11 +417,19 @@ def test_clear(self):
self.send_signal(self.widget.Inputs.words, None)
self.wait_until_finished()
- self.assertEqual(self.widget.selection, [])
- self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs))
+ self.assertEqual(self.widget.selection, [0])
+ self.assertIsNotNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertIsNotNone(self.get_output(self.widget.Outputs.other_docs))
self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus))
+ self.send_signal(self.widget.Inputs.corpus, None)
+ self.wait_until_finished()
+
+ self.assertEqual(self.widget.selection, [])
+ self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs))
+ self.assertIsNone(self.get_output(self.widget.Outputs.other_docs))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
+
def test_sorted_table_selection(self):
self.widget.controls.threshold.setValue(1)