Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Semantic Viewer - show document when no words at the input #933

Merged
merged 1 commit into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions orangecontrib/text/widgets/owsemanticviewer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import re
from types import SimpleNamespace
from typing import Optional, Any, List, Tuple
from typing import Optional, Any, List, Tuple, Union

import numpy as np

from AnyQt.QtCore import Qt, QUrl, QItemSelection, QItemSelectionModel, \
QModelIndex
from AnyQt.QtWidgets import QTableView, QSplitter, QApplication

from Orange.data import Table, Domain, StringVariable
from Orange.data import Table
from Orange.widgets import gui
from Orange.widgets.settings import Setting
from Orange.widgets.utils.annotated_data import create_annotated_table
Expand Down Expand Up @@ -303,6 +303,8 @@ def set_words(self, words: Optional[Table]):
def handleNewSignals(self):
self._clear()
self.update_scores()
if self.corpus is not None:
self._list_documents()

def update_scores(self):
self.start(run, self.corpus, self.words)
Expand All @@ -321,20 +323,31 @@ def on_done(self, results: Results):
if not self._results or not self.corpus or not self.words:
self.commit()
return
self._list_documents()

def _list_documents(self):
model = self._list_view.model()
model.setHorizontalHeaderLabels(["Match", "Score", "Document"])

def get_avg_score(result: List) -> float:
return "NA" if result is None else np.mean([r[1] for r in result])
def get_avg_score(i: int) -> Union[float, str]:
if self._results is not None:
result = self._results[i]
return "NA" if result is None else np.mean([r[1] for r in result])
else:
return ""

def get_n_matches(ngram):
return sum(ngram.count(word) for word in self.words)
if self.words is not None:
return sum(ngram.count(word) for word in self.words)
else:
return ""

data = [[get_n_matches(ngram), get_avg_score(res), title]
for res, title, ngram in zip(self._results,
self.corpus.titles.tolist(),
self.corpus.ngrams)]
data = [
[get_n_matches(ngram), get_avg_score(i), title]
for i, (title, ngram) in enumerate(
zip(self.corpus.titles.tolist(), self.corpus.ngrams)
)
]
model.wrap(data)
for i in range(len(data)):
model.setData(model.index(i, 0), i, role=IndexRole)
Expand Down Expand Up @@ -370,16 +383,18 @@ def _set_selected_rows(self, selected_rows: List[int]):
)

def _show_documents(self):
if self.corpus is None or self._results is None:
if self.corpus is None:
return

documents = self.corpus.documents
parser = DisplayDocument(self.display_index)
htmls = []
for doc_index in self.selection:
text = documents[doc_index]
matches = [ind for ind, score in self._results[doc_index] or []
if score >= self.threshold]
matches = []
if self._results:
matches = [ind for ind, score in self._results[doc_index] or []
if score >= self.threshold]
text = parser(text, matches)
text = text.replace("\n", "<br/>")
html = f"<p>{text}</p>"
Expand Down
31 changes: 29 additions & 2 deletions orangecontrib/text/widgets/tests/test_owsemanticviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,25 @@ def test_table(self):
for j in range(model.columnCount()):
self.assertEqual(model.data(model.index(i, j)), table[i][j])

def test_table_no_words(self):
"""When no words on the input still show documents but no scores"""
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.wait_until_finished()

model = self.widget._list_view.model()
table = [["", "", "Document 1"],
["", "", "Document 2"],
["", "", "Document 3"],
["", "", "Document 4"],
["", "", "Document 5"],
["", "", "Document 6"],
["", "", "Document 7"],
["", "", "Document 8"],
["", "", "Document 9"]]
for i in range(len(self.corpus)):
for j in range(model.columnCount()):
self.assertEqual(model.data(model.index(i, j)), table[i][j])

def test_webview(self):
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.send_signal(self.widget.Inputs.words, self.words)
Expand Down Expand Up @@ -398,11 +417,19 @@ def test_clear(self):
self.send_signal(self.widget.Inputs.words, None)
self.wait_until_finished()

self.assertEqual(self.widget.selection, [])
self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertEqual(self.widget.selection, [0])
self.assertIsNotNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertIsNotNone(self.get_output(self.widget.Outputs.other_docs))
self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus))

self.send_signal(self.widget.Inputs.corpus, None)
self.wait_until_finished()

self.assertEqual(self.widget.selection, [])
self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertIsNone(self.get_output(self.widget.Outputs.other_docs))
self.assertIsNone(self.get_output(self.widget.Outputs.corpus))

def test_sorted_table_selection(self):
self.widget.controls.threshold.setValue(1)

Expand Down