|
1 | 1 | from difflib import SequenceMatcher |
2 | 2 | import re |
3 | | -from inflection import singularize as _singularize_en |
4 | 3 | from enum import IntEnum, auto |
5 | 4 | from ovos_utils.log import LOG |
6 | 5 |
|
@@ -101,8 +100,6 @@ def match_all(query, choices, match_func=None, strategy=MatchStrategy.SIMPLE_RAT |
101 | 100 |
|
102 | 101 |
|
103 | 102 | def singularize(word, lang="en"): |
104 | | - if lang.startswith("en"): |
105 | | - return _singularize_en(word) |
106 | 103 | return word.rstrip("s") |
107 | 104 |
|
108 | 105 |
|
@@ -252,61 +249,3 @@ def extract_paragraphs(query, text, lang="en"): |
252 | 249 | return search_in_text(query, text, lang, |
253 | 250 | all_matches=True, paragraphs=True) |
254 | 251 |
|
255 | | - |
256 | | -if __name__ == "__main__": |
257 | | - |
258 | | - |
259 | | - print("## Searching: intent") |
260 | | - for sent, score in extract_sentences("intent", wiki_dump): |
261 | | - if score > 0.3: |
262 | | - print(sent) |
263 | | - |
264 | | - print("## Searching: precise") |
265 | | - for sent, score in extract_paragraphs("precise", wiki_dump): |
266 | | - if score > 0.3: |
267 | | - print(sent) |
268 | | - exit(0) |
269 | | - s = "hello. He said" |
270 | | - for s in split_sentences(s): |
271 | | - print(s) |
272 | | - s = "hello . He said" |
273 | | - for s in split_sentences(s): |
274 | | - print(s) |
275 | | - |
276 | | - # no splitting |
277 | | - s = "hello.com" |
278 | | - for s in split_sentences(s): |
279 | | - print(s) |
280 | | - s = "A.E:I.O.U" |
281 | | - for s in split_sentences(s): |
282 | | - print(s) |
283 | | - |
284 | | - # ambiguous, but will split |
285 | | - s = "hello.He said" |
286 | | - for s in split_sentences(s): |
287 | | - print(s) |
288 | | - |
289 | | - # ambiguous, no split |
290 | | - s = "hello. he said" # could be "Jones Jr. thinks ..." |
291 | | - for s in split_sentences(s): |
292 | | - print(s) |
293 | | - s = "hello.he said" # could be "www.hello.com" |
294 | | - for s in split_sentences(s): |
295 | | - print(s) |
296 | | - s = "hello . he said" # TODO maybe split this one? |
297 | | - for s in split_sentences(s): |
298 | | - print(s) |
299 | | - |
300 | | - # test all |
301 | | - s = "Mr. Smith bought cheapsite.com for 1.5 million dollars, i.e. he paid a lot for it. Did he mind? Adam Jones Jr. thinks he didn't. In any case, this isn't true... Well, with a probability of .9 it isn't.I know right\nOK" |
302 | | - print(summarize(s)) |
303 | | - for s in split_sentences(s): |
304 | | - print(s) |
305 | | - |
306 | | - s = "this is {remove me} the first sentence " |
307 | | - print(summarize(s)) |
308 | | - s = " this is (remove me) second. and the 3rd" |
309 | | - print(summarize(s)) |
310 | | - s = "this is [remove me] number 4! number5? number6. number 7 \n " \ |
311 | | - "number N" |
312 | | - print(summarize(s)) |
0 commit comments