From 0be467c148e8a66ee79dac36aa37a030fa3d37c3 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Mon, 30 Jan 2023 10:30:13 +0100 Subject: [PATCH] Semantic Viewer - show document when no words at the input --- .../text/widgets/owsemanticviewer.py | 39 +++++++++++++------ .../widgets/tests/test_owsemanticviewer.py | 31 ++++++++++++++- 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/orangecontrib/text/widgets/owsemanticviewer.py b/orangecontrib/text/widgets/owsemanticviewer.py index 274076e4f..52cd2dc05 100644 --- a/orangecontrib/text/widgets/owsemanticviewer.py +++ b/orangecontrib/text/widgets/owsemanticviewer.py @@ -1,6 +1,6 @@ import re from types import SimpleNamespace -from typing import Optional, Any, List, Tuple +from typing import Optional, Any, List, Tuple, Union import numpy as np @@ -8,7 +8,7 @@ QModelIndex from AnyQt.QtWidgets import QTableView, QSplitter, QApplication -from Orange.data import Table, Domain, StringVariable +from Orange.data import Table from Orange.widgets import gui from Orange.widgets.settings import Setting from Orange.widgets.utils.annotated_data import create_annotated_table @@ -303,6 +303,8 @@ def set_words(self, words: Optional[Table]): def handleNewSignals(self): self._clear() self.update_scores() + if self.corpus is not None: + self._list_documents() def update_scores(self): self.start(run, self.corpus, self.words) @@ -321,20 +323,31 @@ def on_done(self, results: Results): if not self._results or not self.corpus or not self.words: self.commit() return + self._list_documents() + def _list_documents(self): model = self._list_view.model() model.setHorizontalHeaderLabels(["Match", "Score", "Document"]) - def get_avg_score(result: List) -> float: - return "NA" if result is None else np.mean([r[1] for r in result]) + def get_avg_score(i: int) -> Union[float, str]: + if self._results is not None: + result = self._results[i] + return "NA" if result is None else np.mean([r[1] for r in result]) + else: + return "" def get_n_matches(ngram): - return sum(ngram.count(word) for word in self.words) + if self.words is not None: + return sum(ngram.count(word) for word in self.words) + else: + return "" - data = [[get_n_matches(ngram), get_avg_score(res), title] - for res, title, ngram in zip(self._results, - self.corpus.titles.tolist(), - self.corpus.ngrams)] + data = [ + [get_n_matches(ngram), get_avg_score(i), title] + for i, (title, ngram) in enumerate( + zip(self.corpus.titles.tolist(), self.corpus.ngrams) + ) + ] model.wrap(data) for i in range(len(data)): model.setData(model.index(i, 0), i, role=IndexRole) @@ -370,7 +383,7 @@ def _set_selected_rows(self, selected_rows: List[int]): ) def _show_documents(self): - if self.corpus is None or self._results is None: + if self.corpus is None: return documents = self.corpus.documents @@ -378,8 +391,10 @@ def _show_documents(self): htmls = [] for doc_index in self.selection: text = documents[doc_index] - matches = [ind for ind, score in self._results[doc_index] or [] - if score >= self.threshold] + matches = [] + if self._results: + matches = [ind for ind, score in self._results[doc_index] or [] + if score >= self.threshold] text = parser(text, matches) text = text.replace("\n", "
") html = f"

{text}

" diff --git a/orangecontrib/text/widgets/tests/test_owsemanticviewer.py b/orangecontrib/text/widgets/tests/test_owsemanticviewer.py index a6d7b1c29..f43c2bcac 100644 --- a/orangecontrib/text/widgets/tests/test_owsemanticviewer.py +++ b/orangecontrib/text/widgets/tests/test_owsemanticviewer.py @@ -339,6 +339,25 @@ def test_table(self): for j in range(model.columnCount()): self.assertEqual(model.data(model.index(i, j)), table[i][j]) + def test_table_no_words(self): + """When no words on the input still show documents but no scores""" + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.wait_until_finished() + + model = self.widget._list_view.model() + table = [["", "", "Document 1"], + ["", "", "Document 2"], + ["", "", "Document 3"], + ["", "", "Document 4"], + ["", "", "Document 5"], + ["", "", "Document 6"], + ["", "", "Document 7"], + ["", "", "Document 8"], + ["", "", "Document 9"]] + for i in range(len(self.corpus)): + for j in range(model.columnCount()): + self.assertEqual(model.data(model.index(i, j)), table[i][j]) + def test_webview(self): self.send_signal(self.widget.Inputs.corpus, self.corpus) self.send_signal(self.widget.Inputs.words, self.words) @@ -398,11 +417,19 @@ def test_clear(self): self.send_signal(self.widget.Inputs.words, None) self.wait_until_finished() - self.assertEqual(self.widget.selection, []) - self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs)) + self.assertEqual(self.widget.selection, [0]) + self.assertIsNotNone(self.get_output(self.widget.Outputs.matching_docs)) self.assertIsNotNone(self.get_output(self.widget.Outputs.other_docs)) self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus)) + self.send_signal(self.widget.Inputs.corpus, None) + self.wait_until_finished() + + self.assertEqual(self.widget.selection, []) + self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs)) + self.assertIsNone(self.get_output(self.widget.Outputs.other_docs)) + self.assertIsNone(self.get_output(self.widget.Outputs.corpus)) + def test_sorted_table_selection(self): self.widget.controls.threshold.setValue(1)