From 2d1a1fa6c807a8a8cb048307c7f71039cf414e33 Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 13 Jan 2020 14:48:49 +0300 Subject: [PATCH 01/38] :construction: Add long time job Signed-off-by: Emil --- .../erpnext_ocr/doctype/ocr_read/ocr_read.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index fcd402b6..d1ccbbbd 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -17,6 +17,7 @@ from spellchecker import SpellChecker + def get_words_from_text(message): message = re.sub(r'\W+', " ", message) word_list = list(filter(None, message.split())) @@ -36,14 +37,13 @@ def get_spellchecked_text(message, language): class OCRRead(Document): def read_image(self): - text = read_document(self.file_to_read, self.language or 'eng', self.spell_checker) - self.read_result = text - self.save() - return text + frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_document", queue="long", + timeout=1500, **{ + 'path': self.file_to_read, 'lang': self.language, 'spellcheck': self.spell_checker, 'obj': self}) @frappe.whitelist() -def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar"): +def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar", obj=None): """Call Tesseract OCR to extract the text from a document.""" from PIL import Image import requests @@ -124,7 +124,8 @@ def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar"): frappe.publish_realtime( event, {"progress": [100, 100]}, user=frappe.session.user) - + obj.read_result = text + obj.save() return text From 4a47e49bf7684dafbc03a62de0fe18b2dd487012 Mon Sep 17 00:00:00 2001 From: "mathieu.brunot" Date: Mon, 13 Jan 2020 16:58:59 +0100 Subject: [PATCH 02/38] :construction: Add TDD functions Signed-off-by: mathieu.brunot --- .../erpnext_ocr/doctype/ocr_read/ocr_read.py | 28 ++++++-- .../doctype/ocr_read/test_ocr_read.py | 65 +++++++++++++++++++ 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index d1ccbbbd..fac8f61e 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -37,13 +37,30 @@ def get_spellchecked_text(message, language): class OCRRead(Document): def read_image(self): - frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_document", queue="long", - timeout=1500, **{ - 'path': self.file_to_read, 'lang': self.language, 'spellcheck': self.spell_checker, 'obj': self}) + return read_ocr(self) + + def read_image_bg(self): + return frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_ocr", queue="long", + timeout=1500, **{'obj': self}) + + +@frappe.whitelist() +def read_ocr(obj): + """Call Tesseract OCR to extract the text from a OCR Read object.""" + + if obj is None: + frappe.msgprint(frappe._("An expected error occurred."), + raise_exception=True) + + text = read_document(obj.file_to_read, obj.language or 'eng', obj.spell_checker) + obj.read_result = text + obj.save() + + return text @frappe.whitelist() -def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar", obj=None): +def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar"): """Call Tesseract OCR to extract the text from a document.""" from PIL import Image import requests @@ -124,8 +141,7 @@ def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar", frappe.publish_realtime( event, {"progress": [100, 100]}, user=frappe.session.user) - obj.read_result = text - obj.save() + return text diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index ed29ccf8..5706ea91 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -90,6 +90,71 @@ def setUp(self): def tearDown(self): delete_ocr_reads() + def test_ocr_read_image_bg(self): + frappe.set_user("Administrator") + doc = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng" + }) + + self.assertEqual(None, doc.read_result) + + worker = doc.read_image_bg() + # [TODO] Test worker completion before moving on in the tests + + self.assertEqual(None, doc.read_result) + + new_doc = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng" + }) + + self.assertNotEqual(new_doc.read_result, doc.read_result) + + self.assertIn("The quick brown fox", new_doc.read_result) + self.assertIn("jumped over the 5", new_doc.read_result) + self.assertIn("lazy dogs!", new_doc.read_result) + self.assertNotIn("And an elephant!", new_doc.read_result) + + + def test_ocr_read_image_bg_pdf(self): + frappe.set_user("Administrator") + doc = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample2.pdf"), + "language": "eng" + }) + + self.assertEqual(None, doc.read_result) + + worker = doc.read_image_bg() + # [TODO] Test worker completion before moving on in the tests + + self.assertEqual(None, doc.read_result) + + new_doc = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng" + }) + + # FIXME values are not equal on Alpine ??! + #self.maxDiff = None + #self.assertEqual(new_doc.read_result, doc.read_result) + + self.assertIn("Python Basics", new_doc.read_result) + self.assertNotIn("Java", new_doc.read_result) + def test_ocr_read_image(self): frappe.set_user("Administrator") From 3deb8dd75cbc8a700f062fe49cf3cd5c6b409d64 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 16 Jan 2020 15:44:32 +0300 Subject: [PATCH 03/38] :construction: Add test for bg job Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 5706ea91..c127e9ef 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,6 +5,8 @@ from __future__ import unicode_literals +import time + import frappe import unittest import os @@ -104,6 +106,8 @@ def test_ocr_read_image_bg(self): worker = doc.read_image_bg() # [TODO] Test worker completion before moving on in the tests + time.sleep(5) + self.assertIsNotNone(worker.ended_at) self.assertEqual(None, doc.read_result) From 41a488fc63653b21b7dc55c0db39bb4629905684 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 16 Jan 2020 15:45:24 +0300 Subject: [PATCH 04/38] :construction: Add Todo for future Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index c127e9ef..a6dfecd2 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -106,7 +106,7 @@ def test_ocr_read_image_bg(self): worker = doc.read_image_bg() # [TODO] Test worker completion before moving on in the tests - time.sleep(5) + time.sleep(5) # TODO: Will be better if we can understand how realize producer-consumer pattern self.assertIsNotNone(worker.ended_at) self.assertEqual(None, doc.read_result) From 035feda6cb64212827a817448f96ea6ef0a9af02 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 16 Jan 2020 15:53:45 +0300 Subject: [PATCH 05/38] :construction: fix unused variable Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py | 3 ++- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index fac8f61e..da26d3d0 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -70,7 +70,8 @@ def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar"): return None if not lang_available(lang): - frappe.msgprint(frappe._("The selected language is not available. Please contact your administrator."), + frappe.msgprint(frappe._ + ("The selected language is not available. Please contact your administrator."), raise_exception=True) frappe.publish_realtime(event, {"progress": "0"}, user=frappe.session.user) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index a6dfecd2..794d275b 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -104,10 +104,10 @@ def test_ocr_read_image_bg(self): self.assertEqual(None, doc.read_result) - worker = doc.read_image_bg() + _worker = doc.read_image_bg() # [TODO] Test worker completion before moving on in the tests time.sleep(5) # TODO: Will be better if we can understand how realize producer-consumer pattern - self.assertIsNotNone(worker.ended_at) + self.assertIsNotNone(_worker.ended_at) self.assertEqual(None, doc.read_result) From 8ef702d7d7e3a0770c1abae729488e20db05b8a2 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 16 Jan 2020 15:57:34 +0300 Subject: [PATCH 06/38] :construction: Add another test Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 794d275b..6f13f133 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -104,10 +104,10 @@ def test_ocr_read_image_bg(self): self.assertEqual(None, doc.read_result) - _worker = doc.read_image_bg() + worker = doc.read_image_bg() # [TODO] Test worker completion before moving on in the tests time.sleep(5) # TODO: Will be better if we can understand how realize producer-consumer pattern - self.assertIsNotNone(_worker.ended_at) + self.assertIsNotNone(worker.ended_at) self.assertEqual(None, doc.read_result) @@ -141,6 +141,8 @@ def test_ocr_read_image_bg_pdf(self): worker = doc.read_image_bg() # [TODO] Test worker completion before moving on in the tests + time.sleep(5) # TODO: Will be better if we can understand how realize producer-consumer pattern + self.assertIsNotNone(worker.ended_at) self.assertEqual(None, doc.read_result) From e3b089593ca714d85c003a5cac7b66359c16d22d Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 21 Jan 2020 13:36:10 +0300 Subject: [PATCH 07/38] :construction: Fix test Signed-off-by: Emil --- .../erpnext_ocr/doctype/ocr_read/ocr_read.js | 2 +- .../erpnext_ocr/doctype/ocr_read/ocr_read.py | 4 ++-- .../erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 14 +++++--------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js index b6724296..f53c3019 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js @@ -17,7 +17,7 @@ frappe.ui.form.on('OCR Read', { callback: function (r) { cur_dialog.hide(); frappe.msgprint(r.message.message); - cur_frm.set_value("read_result", r.message); + cur_frm.refresh(); } }); } diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index da26d3d0..807a92ca 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -39,9 +39,9 @@ class OCRRead(Document): def read_image(self): return read_ocr(self) - def read_image_bg(self): + def read_image_bg(self, is_async=True): return frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_ocr", queue="long", - timeout=1500, **{'obj': self}) + timeout=1500, is_async=is_async, **{'obj': self}) @frappe.whitelist() diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 6f13f133..572812e1 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals -import time import frappe import unittest @@ -104,12 +103,10 @@ def test_ocr_read_image_bg(self): self.assertEqual(None, doc.read_result) - worker = doc.read_image_bg() + worker = doc.read_image_bg(is_async=False) # [TODO] Test worker completion before moving on in the tests - time.sleep(5) # TODO: Will be better if we can understand how realize producer-consumer pattern - self.assertIsNotNone(worker.ended_at) + self.assertEqual(worker._status, "finished") - self.assertEqual(None, doc.read_result) new_doc = frappe.get_doc({ "doctype": "OCR Read", @@ -139,11 +136,10 @@ def test_ocr_read_image_bg_pdf(self): self.assertEqual(None, doc.read_result) - worker = doc.read_image_bg() + worker = doc.read_image_bg(is_async=False) # [TODO] Test worker completion before moving on in the tests - time.sleep(5) # TODO: Will be better if we can understand how realize producer-consumer pattern - self.assertIsNotNone(worker.ended_at) - + # TODO: Will be better if we can understand how realize producer-consumer pattern + self.assertEqual(worker._status, "finished") self.assertEqual(None, doc.read_result) new_doc = frappe.get_doc({ From 50ce944fedb001f895dea43fcf3880f92da87047 Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 21 Jan 2020 17:04:44 +0300 Subject: [PATCH 08/38] :white_check_mark: Fix tests Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 572812e1..10dbb637 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -105,7 +105,7 @@ def test_ocr_read_image_bg(self): worker = doc.read_image_bg(is_async=False) # [TODO] Test worker completion before moving on in the tests - self.assertEqual(worker._status, "finished") + self.assertTrue(worker._status in ["queued", "finished"]) new_doc = frappe.get_doc({ @@ -139,7 +139,7 @@ def test_ocr_read_image_bg_pdf(self): worker = doc.read_image_bg(is_async=False) # [TODO] Test worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern - self.assertEqual(worker._status, "finished") + self.assertTrue(worker._status in ["queued", "finished"]) self.assertEqual(None, doc.read_result) new_doc = frappe.get_doc({ From 9f62767bef51a38ecf167710b0ff8f08bd42f190 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 22 Jan 2020 12:02:56 +0300 Subject: [PATCH 09/38] :white_check_mark: Trying fix test Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 10dbb637..20d28f72 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals +import time import frappe import unittest @@ -116,6 +117,7 @@ def test_ocr_read_image_bg(self): "language": "eng" }) + time.sleep(5) self.assertNotEqual(new_doc.read_result, doc.read_result) self.assertIn("The quick brown fox", new_doc.read_result) @@ -153,7 +155,7 @@ def test_ocr_read_image_bg_pdf(self): # FIXME values are not equal on Alpine ??! #self.maxDiff = None #self.assertEqual(new_doc.read_result, doc.read_result) - + new_doc.read_image() self.assertIn("Python Basics", new_doc.read_result) self.assertNotIn("Java", new_doc.read_result) From bc764fe6c1e8f1b38f443be437ed796e2d69ee27 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 22 Jan 2020 13:05:42 +0300 Subject: [PATCH 10/38] :white_check_mark: Trying fix test v2 Signed-off-by: Emil --- .../erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 20d28f72..bdd701c8 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -107,7 +107,8 @@ def test_ocr_read_image_bg(self): worker = doc.read_image_bg(is_async=False) # [TODO] Test worker completion before moving on in the tests self.assertTrue(worker._status in ["queued", "finished"]) - + while worker._status != 'finished': + time.sleep(1) new_doc = frappe.get_doc({ "doctype": "OCR Read", @@ -116,10 +117,7 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) - - time.sleep(5) self.assertNotEqual(new_doc.read_result, doc.read_result) - self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) @@ -156,8 +154,8 @@ def test_ocr_read_image_bg_pdf(self): #self.maxDiff = None #self.assertEqual(new_doc.read_result, doc.read_result) new_doc.read_image() - self.assertIn("Python Basics", new_doc.read_result) - self.assertNotIn("Java", new_doc.read_result) + self.assertIn("Python Basics", doc.read_result) + self.assertNotIn("Java", doc.read_result) def test_ocr_read_image(self): From 34a9651b9d12f9d79d785f81c5a846e325e1debe Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 22 Jan 2020 13:39:23 +0300 Subject: [PATCH 11/38] :white_check_mark: Trying fix test v3 Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index bdd701c8..b24dcf0a 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -11,7 +11,8 @@ import unittest import os -from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import force_attach_file_doc +from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import force_attach_file_doc, read_ocr + # TODO Frappe default test records creation #def _make_test_records(verbose): @@ -107,9 +108,6 @@ def test_ocr_read_image_bg(self): worker = doc.read_image_bg(is_async=False) # [TODO] Test worker completion before moving on in the tests self.assertTrue(worker._status in ["queued", "finished"]) - while worker._status != 'finished': - time.sleep(1) - new_doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), @@ -117,6 +115,7 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) + read_ocr(doc) self.assertNotEqual(new_doc.read_result, doc.read_result) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) @@ -140,7 +139,6 @@ def test_ocr_read_image_bg_pdf(self): # [TODO] Test worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern self.assertTrue(worker._status in ["queued", "finished"]) - self.assertEqual(None, doc.read_result) new_doc = frappe.get_doc({ "doctype": "OCR Read", From 08a81514a24efdf47f1bf2f15a3b87d2ed402a0d Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 22 Jan 2020 13:42:41 +0300 Subject: [PATCH 12/38] :white_check_mark: Remove time library Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 1 - 1 file changed, 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index b24dcf0a..847c0e49 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals -import time import frappe import unittest From 5c3a9c9e4478aec846e6d9691f24329981218383 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 22 Jan 2020 13:59:14 +0300 Subject: [PATCH 13/38] :white_check_mark: Blank commit Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 1 - 1 file changed, 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 847c0e49..204c0911 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals - import frappe import unittest import os From 5343d661db5a30e0e7c8fd7af63c403bbc1aad02 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 23 Jan 2020 13:39:05 +0300 Subject: [PATCH 14/38] :lipstick: Update tests for validation Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index f7b13e26..84495858 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -113,8 +113,8 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) - read_ocr(doc) self.assertNotEqual(new_doc.read_result, doc.read_result) + read_ocr(new_doc) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) @@ -149,7 +149,7 @@ def test_ocr_read_image_bg_pdf(self): # FIXME values are not equal on Alpine ??! #self.maxDiff = None #self.assertEqual(new_doc.read_result, doc.read_result) - new_doc.read_image() + read_ocr(new_doc) self.assertIn("Python Basics", doc.read_result) self.assertNotIn("Java", doc.read_result) From ef2cad12ccdc84e086f2617a42632bd039672666 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 23 Jan 2020 16:30:28 +0300 Subject: [PATCH 15/38] :white_check_mark: Add new tests Signed-off-by: Emil --- .../doctype/ocr_read/test_ocr_read.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 84495858..75e26195 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -113,8 +113,20 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) - self.assertNotEqual(new_doc.read_result, doc.read_result) + new_doc_2 = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng" + }) read_ocr(new_doc) + if worker._status == "queued": + self.assertIsNone(new_doc.read_result) + self.assertIsNone(doc.read_result) + else: + self.assertEqual(new_doc.read_result, doc.read_result) + self.assertNotEqual(new_doc_2.read_result, new_doc.read_result) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) @@ -149,7 +161,7 @@ def test_ocr_read_image_bg_pdf(self): # FIXME values are not equal on Alpine ??! #self.maxDiff = None #self.assertEqual(new_doc.read_result, doc.read_result) - read_ocr(new_doc) + read_ocr(doc) self.assertIn("Python Basics", doc.read_result) self.assertNotIn("Java", doc.read_result) From 4918b9b5cb414baa3e4c636d1e2cc8b59d7008e0 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 23 Jan 2020 16:38:36 +0300 Subject: [PATCH 16/38] :white_check_mark: Update tests Signed-off-by: Emil --- .../erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 75e26195..67836daa 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -154,16 +154,20 @@ def test_ocr_read_image_bg_pdf(self): "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), + "tests", "test_data", "sample2.pdf"), "language": "eng" }) # FIXME values are not equal on Alpine ??! #self.maxDiff = None #self.assertEqual(new_doc.read_result, doc.read_result) - read_ocr(doc) - self.assertIn("Python Basics", doc.read_result) - self.assertNotIn("Java", doc.read_result) + if worker._status == "finished": + self.assertIn("Python Basics", doc.read_result) + self.assertNotIn("Java", doc.read_result) + if worker._status == "queued": + read_ocr(new_doc) + self.assertIn("Python Basics", new_doc.read_result) + self.assertNotIn("Java", new_doc.read_result) def test_ocr_read_image(self): From 0f222e225a8615742243425cd12402461dcba6ad Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 23 Jan 2020 17:09:25 +0300 Subject: [PATCH 17/38] :white_check_mark: Last update test Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 1 - 1 file changed, 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 67836daa..263e6a0b 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -120,7 +120,6 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) - read_ocr(new_doc) if worker._status == "queued": self.assertIsNone(new_doc.read_result) self.assertIsNone(doc.read_result) From eb72741836c908dbc8d53ce84aac0a0396e6f659 Mon Sep 17 00:00:00 2001 From: Emil Date: Thu, 23 Jan 2020 17:49:36 +0300 Subject: [PATCH 18/38] :lipstick: The last beautify of code Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 1 + 1 file changed, 1 insertion(+) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 263e6a0b..0b6b5e4a 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -126,6 +126,7 @@ def test_ocr_read_image_bg(self): else: self.assertEqual(new_doc.read_result, doc.read_result) self.assertNotEqual(new_doc_2.read_result, new_doc.read_result) + read_ocr(new_doc) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) From 50742ed0f5ade4cbc368a353b3d1968116007b00 Mon Sep 17 00:00:00 2001 From: Emil Date: Fri, 24 Jan 2020 17:48:21 +0300 Subject: [PATCH 19/38] :white_check_mark: Update tests Signed-off-by: Emil --- .../doctype/ocr_read/test_ocr_read.py | 41 ++++++++----------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 0b6b5e4a..b75874bd 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,6 +5,8 @@ from __future__ import unicode_literals +import time + import frappe import unittest import os @@ -113,20 +115,14 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) - new_doc_2 = frappe.get_doc({ - "doctype": "OCR Read", - "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), - "language": "eng" - }) - if worker._status == "queued": - self.assertIsNone(new_doc.read_result) - self.assertIsNone(doc.read_result) - else: - self.assertEqual(new_doc.read_result, doc.read_result) - self.assertNotEqual(new_doc_2.read_result, new_doc.read_result) - read_ocr(new_doc) + new_worker = new_doc.read_image_bg(is_async=False) + while worker._status == "queued": + time.sleep(5) + + while new_worker._status == "queued": + time.sleep(5) + + self.assertEqual(new_doc.read_result, doc.read_result) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) @@ -149,7 +145,8 @@ def test_ocr_read_image_bg_pdf(self): # [TODO] Test worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern self.assertTrue(worker._status in ["queued", "finished"]) - + while worker._status == "queued": + time.sleep(5) new_doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), @@ -157,17 +154,13 @@ def test_ocr_read_image_bg_pdf(self): "tests", "test_data", "sample2.pdf"), "language": "eng" }) + worker2 = new_doc.read_image_bg(is_async=False) # FIXME values are not equal on Alpine ??! - #self.maxDiff = None - #self.assertEqual(new_doc.read_result, doc.read_result) - if worker._status == "finished": - self.assertIn("Python Basics", doc.read_result) - self.assertNotIn("Java", doc.read_result) - if worker._status == "queued": - read_ocr(new_doc) - self.assertIn("Python Basics", new_doc.read_result) - self.assertNotIn("Java", new_doc.read_result) + while worker2._status == "queued": + time.sleep(5) + self.assertIn("Python Basics", new_doc.read_result) + self.assertNotIn("Java", new_doc.read_result) def test_ocr_read_image(self): From fcaa6dfb0f8a7c5e0159c3e40e1014964548fb23 Mon Sep 17 00:00:00 2001 From: Emil Date: Fri, 24 Jan 2020 17:50:35 +0300 Subject: [PATCH 20/38] :white_check_mark: Remove unnecesary library Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index b75874bd..f8647637 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -11,7 +11,7 @@ import unittest import os -from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import force_attach_file_doc, read_ocr +from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import force_attach_file_doc # TODO Frappe default test records creation From e6128c6e4239be6346ec9ee179599eb8fe9edc73 Mon Sep 17 00:00:00 2001 From: Mathieu Brunot Date: Fri, 24 Jan 2020 18:27:07 +0300 Subject: [PATCH 21/38] :ok_hand: Fix unit tests process for bg jobs --- .../doctype/ocr_read/test_ocr_read.py | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index f8647637..ac2e820e 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -106,8 +106,13 @@ def test_ocr_read_image_bg(self): self.assertEqual(None, doc.read_result) worker = doc.read_image_bg(is_async=False) - # [TODO] Test worker completion before moving on in the tests - self.assertTrue(worker._status in ["queued", "finished"]) + + # Wait worker completion before moving on in the tests + while worker._status == "queued": + time.sleep(5) + + # Check worker completion and get "new" document after update by bg job + self.assertEqual(worker._status, "finished") new_doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), @@ -115,14 +120,8 @@ def test_ocr_read_image_bg(self): "tests", "test_data", "sample1.jpg"), "language": "eng" }) - new_worker = new_doc.read_image_bg(is_async=False) - while worker._status == "queued": - time.sleep(5) - - while new_worker._status == "queued": - time.sleep(5) - self.assertEqual(new_doc.read_result, doc.read_result) + self.assertNotEqual(new_doc.read_result, doc.read_result) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) @@ -142,11 +141,14 @@ def test_ocr_read_image_bg_pdf(self): self.assertEqual(None, doc.read_result) worker = doc.read_image_bg(is_async=False) - # [TODO] Test worker completion before moving on in the tests + + # Wait worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern - self.assertTrue(worker._status in ["queued", "finished"]) while worker._status == "queued": time.sleep(5) + + # Check worker completion and get "new" document after update by bg job + self.assertEqual(worker._status, "finished") new_doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), @@ -154,11 +156,8 @@ def test_ocr_read_image_bg_pdf(self): "tests", "test_data", "sample2.pdf"), "language": "eng" }) - worker2 = new_doc.read_image_bg(is_async=False) - # FIXME values are not equal on Alpine ??! - while worker2._status == "queued": - time.sleep(5) + self.assertNotEqual(new_doc.read_result, doc.read_result) self.assertIn("Python Basics", new_doc.read_result) self.assertNotIn("Java", new_doc.read_result) From d8bbc0d7e5c840b02297a5d193a75b8657ae24ad Mon Sep 17 00:00:00 2001 From: Emil Date: Fri, 24 Jan 2020 18:42:11 +0300 Subject: [PATCH 22/38] :white_check_mark: Update tests Signed-off-by: Emil --- .../doctype/ocr_read/test_ocr_read.py | 60 ++++++++----------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index ac2e820e..f8bbe449 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -15,7 +15,7 @@ # TODO Frappe default test records creation -#def _make_test_records(verbose): +# def _make_test_records(verbose): # from frappe.test_runner import make_test_objects # # docs = [ @@ -81,7 +81,7 @@ def delete_ocr_reads(): doc.delete() # Delete directly in DB to avoid validation errors - #frappe.db.sql("""delete from `tabOCR Read`""") + # frappe.db.sql("""delete from `tabOCR Read`""") frappe.flags.test_ocr_reads_created = False @@ -98,8 +98,8 @@ def test_ocr_read_image_bg(self): doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), "language": "eng" }) @@ -113,35 +113,32 @@ def test_ocr_read_image_bg(self): # Check worker completion and get "new" document after update by bg job self.assertEqual(worker._status, "finished") - new_doc = frappe.get_doc({ - "doctype": "OCR Read", - "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), - "language": "eng" - }) + new_doc = frappe.get_doc("OCR Read", + {"file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng"}) - self.assertNotEqual(new_doc.read_result, doc.read_result) + self.assertEqual(new_doc.read_result, doc.read_result) self.assertIn("The quick brown fox", new_doc.read_result) self.assertIn("jumped over the 5", new_doc.read_result) self.assertIn("lazy dogs!", new_doc.read_result) self.assertNotIn("And an elephant!", new_doc.read_result) - def test_ocr_read_image_bg_pdf(self): frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample2.pdf"), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample2.pdf"), "language": "eng" }) self.assertEqual(None, doc.read_result) worker = doc.read_image_bg(is_async=False) - + # Wait worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern while worker._status == "queued": @@ -149,26 +146,23 @@ def test_ocr_read_image_bg_pdf(self): # Check worker completion and get "new" document after update by bg job self.assertEqual(worker._status, "finished") - new_doc = frappe.get_doc({ - "doctype": "OCR Read", + new_doc = frappe.get_doc("OCR Read", { "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample2.pdf"), - "language": "eng" - }) + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample2.pdf"), + "language": "eng"}) - self.assertNotEqual(new_doc.read_result, doc.read_result) + self.assertEqual(new_doc.read_result, doc.read_result) self.assertIn("Python Basics", new_doc.read_result) self.assertNotIn("Java", new_doc.read_result) - def test_ocr_read_image(self): frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), "language": "eng" }) @@ -180,14 +174,13 @@ def test_ocr_read_image(self): self.assertIn("lazy dogs!", recognized_text) self.assertNotIn("And an elephant!", recognized_text) - def test_ocr_read_pdf(self): frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample2.pdf"), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample2.pdf"), "language": "eng" }) @@ -199,13 +192,12 @@ def test_ocr_read_pdf(self): self.assertIn("Python Basics", recognized_text) self.assertNotIn("Java", recognized_text) - def test_force_attach_file_doc(self): doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "Picture_010.png"), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "Picture_010.png"), "language": "eng" }) @@ -213,7 +205,7 @@ def test_force_attach_file_doc(self): forced_doc = frappe.get_doc({ "doctype": "OCR Read", - #"name": doc.name, + # "name": doc.name, "file_to_read": "/private/files/test.tif", "language": "eng" }) @@ -225,7 +217,7 @@ def test_ocr_read_list(self): # frappe.set_user("test1@example.com") frappe.set_user("Administrator") res = frappe.get_list("OCR Read", filters=[ - ["OCR Read", "file_to_read", "like", "%sample%"]], fields=["name", "file_to_read"]) + ["OCR Read", "file_to_read", "like", "%sample%"]], fields=["name", "file_to_read"]) self.assertEqual(len(res), 2) files_to_read = [r.file_to_read for r in res] self.assertTrue(os.path.join(os.path.dirname(__file__), From 94fd70b850772ec99c2ca4a60f506dfa83235527 Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 27 Jan 2020 11:13:25 +0300 Subject: [PATCH 23/38] :consturction: Add logs for debuging Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index f8bbe449..47a669ca 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -109,6 +109,7 @@ def test_ocr_read_image_bg(self): # Wait worker completion before moving on in the tests while worker._status == "queued": + print("queue1") time.sleep(5) # Check worker completion and get "new" document after update by bg job @@ -142,6 +143,7 @@ def test_ocr_read_image_bg_pdf(self): # Wait worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern while worker._status == "queued": + print("queue2") time.sleep(5) # Check worker completion and get "new" document after update by bg job From bad4f0a712db7fb5c6c78b99fcfcfe3605f36d24 Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 27 Jan 2020 12:01:15 +0300 Subject: [PATCH 24/38] :whale: Add more time for tests Signed-off-by: Emil --- .travis.yml | 2 +- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index b8c2005b..9c0f3e4f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,7 +56,7 @@ script: - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_app" | grep "Up" - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" - - echo 'Wait until test finished (3 minutes)' && sleep 180 + - echo 'Wait until test finished (5 minutes)' && sleep 300 - docker-compose -f docker-compose.${DATABASE}.yml logs "sut" - docker-compose -f docker-compose.${DATABASE}.yml ps "sut" | grep "Exit 0" diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 47a669ca..f8bbe449 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -109,7 +109,6 @@ def test_ocr_read_image_bg(self): # Wait worker completion before moving on in the tests while worker._status == "queued": - print("queue1") time.sleep(5) # Check worker completion and get "new" document after update by bg job @@ -143,7 +142,6 @@ def test_ocr_read_image_bg_pdf(self): # Wait worker completion before moving on in the tests # TODO: Will be better if we can understand how realize producer-consumer pattern while worker._status == "queued": - print("queue2") time.sleep(5) # Check worker completion and get "new" document after update by bg job From 5e39492923e6a1954e327e4366c5670504f1bef1 Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 27 Jan 2020 15:24:28 +0300 Subject: [PATCH 25/38] :whale: Add bench doctor Signed-off-by: Emil --- .travis.yml | 2 ++ .../erpnext_ocr/doctype/ocr_language/test_ocr_language.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9c0f3e4f..6b4c753c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,8 @@ script: - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" || exit 1 - echo 'Wait until sites and apps database installed (9-10 minutes)' && travis_wait 15 sleep 720 + - docker-compose exec erpnext_app bench doctor + - docker-compose exec erpnext_app bench enable-scheduler - docker-compose -f docker-compose.${DATABASE}.yml ps - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_db" - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_db" | grep "Up" diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_language/test_ocr_language.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_language/test_ocr_language.py index da953cba..9a1b225c 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_language/test_ocr_language.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_language/test_ocr_language.py @@ -27,7 +27,7 @@ def delete_test_data(): #test_user.remove_roles("System Manager") #test_user.delete() frappe.db.sql("""delete from `tabUser` where email='test_user@example.com'""") # ValidationError without SQL - + frappe.db.sql("""delete from `tabEmail Queue`""") class TestOCRLanguage(unittest.TestCase): def setUp(self): From 5a49a0bd2eae2be1473172377fbf5865a8109dc5 Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 27 Jan 2020 17:12:04 +0300 Subject: [PATCH 26/38] :wrench: Enable schedulers Signed-off-by: Emil --- .travis/docker_test.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis/docker_test.sh b/.travis/docker_test.sh index e9e2c7f6..611c3148 100644 --- a/.travis/docker_test.sh +++ b/.travis/docker_test.sh @@ -62,6 +62,8 @@ FRAPPE_APP_UNIT_TEST_PROFILE="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.pro #bench run-tests --help +bench enable-scheduler + echo "Executing Unit Tests of '${FRAPPE_APP_TO_TEST}' app..." if [ "${TEST_VERSION}" = "10" ]; then bench run-tests \ From a4163bec948cb784418f2cec0a2dc55f96bb099e Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 27 Jan 2020 22:44:12 +0300 Subject: [PATCH 27/38] :wrench: Remove exec function from travis yml Signed-off-by: Emil --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6b4c753c..9c0f3e4f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,8 +36,6 @@ script: - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" || exit 1 - echo 'Wait until sites and apps database installed (9-10 minutes)' && travis_wait 15 sleep 720 - - docker-compose exec erpnext_app bench doctor - - docker-compose exec erpnext_app bench enable-scheduler - docker-compose -f docker-compose.${DATABASE}.yml ps - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_db" - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_db" | grep "Up" From 75f53504e7b1817995298621c3932db6923c80de Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 28 Jan 2020 11:18:28 +0300 Subject: [PATCH 28/38] :wrench: Add bench doctor Signed-off-by: Emil --- .travis/docker_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis/docker_test.sh b/.travis/docker_test.sh index 611c3148..b8959d84 100644 --- a/.travis/docker_test.sh +++ b/.travis/docker_test.sh @@ -63,6 +63,7 @@ FRAPPE_APP_UNIT_TEST_PROFILE="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.pro #bench run-tests --help bench enable-scheduler +bench doctor echo "Executing Unit Tests of '${FRAPPE_APP_TO_TEST}' app..." if [ "${TEST_VERSION}" = "10" ]; then From b326b04588aa4c5042b52d87bbdfa04701e7a2ec Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 28 Jan 2020 11:18:45 +0300 Subject: [PATCH 29/38] :wrench: Add bench doctor before enable scheduler Signed-off-by: Emil --- .travis/docker_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis/docker_test.sh b/.travis/docker_test.sh index b8959d84..3bacb21c 100644 --- a/.travis/docker_test.sh +++ b/.travis/docker_test.sh @@ -62,6 +62,7 @@ FRAPPE_APP_UNIT_TEST_PROFILE="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.pro #bench run-tests --help +bench doctor bench enable-scheduler bench doctor From 333c4307e231a50c9f5de6936a04c6c5bb7953ed Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 28 Jan 2020 12:00:40 +0300 Subject: [PATCH 30/38] :construction: Add tests for OCR Read Signed-off-by: Emil --- .travis/docker_test.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.travis/docker_test.sh b/.travis/docker_test.sh index 3bacb21c..c4b5b3fb 100644 --- a/.travis/docker_test.sh +++ b/.travis/docker_test.sh @@ -65,6 +65,14 @@ FRAPPE_APP_UNIT_TEST_PROFILE="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.pro bench doctor bench enable-scheduler bench doctor +echo "Executing Unit tests for Read ocr doctype" +if [ "${TEST_VERSION}" = "10" ]; then + bench run-tests \ + --doctype "OCR Read" \ + --junit-xml-output "${FRAPPE_APP_UNIT_TEST_REPORT}" \ + --profile > "${FRAPPE_APP_UNIT_TEST_PROFILE}" +fi + echo "Executing Unit Tests of '${FRAPPE_APP_TO_TEST}' app..." if [ "${TEST_VERSION}" = "10" ]; then From a1af764a8ab8a4641c77eaf0c8758ba3aa1d4820 Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 28 Jan 2020 12:39:11 +0300 Subject: [PATCH 31/38] :construction: Mark every tests by print Signed-off-by: Emil --- .travis/docker_test.sh | 8 -------- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 6 ++++++ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.travis/docker_test.sh b/.travis/docker_test.sh index c4b5b3fb..3bacb21c 100644 --- a/.travis/docker_test.sh +++ b/.travis/docker_test.sh @@ -65,14 +65,6 @@ FRAPPE_APP_UNIT_TEST_PROFILE="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.pro bench doctor bench enable-scheduler bench doctor -echo "Executing Unit tests for Read ocr doctype" -if [ "${TEST_VERSION}" = "10" ]; then - bench run-tests \ - --doctype "OCR Read" \ - --junit-xml-output "${FRAPPE_APP_UNIT_TEST_REPORT}" \ - --profile > "${FRAPPE_APP_UNIT_TEST_PROFILE}" -fi - echo "Executing Unit Tests of '${FRAPPE_APP_TO_TEST}' app..." if [ "${TEST_VERSION}" = "10" ]; then diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index f8bbe449..56221894 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -94,6 +94,7 @@ def tearDown(self): delete_ocr_reads() def test_ocr_read_image_bg(self): + print("1") frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", @@ -126,6 +127,7 @@ def test_ocr_read_image_bg(self): self.assertNotIn("And an elephant!", new_doc.read_result) def test_ocr_read_image_bg_pdf(self): + print("2") frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", @@ -157,6 +159,7 @@ def test_ocr_read_image_bg_pdf(self): self.assertNotIn("Java", new_doc.read_result) def test_ocr_read_image(self): + print("3") frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", @@ -175,6 +178,7 @@ def test_ocr_read_image(self): self.assertNotIn("And an elephant!", recognized_text) def test_ocr_read_pdf(self): + print("4") frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", @@ -193,6 +197,7 @@ def test_ocr_read_pdf(self): self.assertNotIn("Java", recognized_text) def test_force_attach_file_doc(self): + print("5") doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), @@ -214,6 +219,7 @@ def test_force_attach_file_doc(self): self.assertEqual('/private/files/test.tif', forced_doc.file_to_read) def test_ocr_read_list(self): + print("6") # frappe.set_user("test1@example.com") frappe.set_user("Administrator") res = frappe.get_list("OCR Read", filters=[ From 9ad60644c6e5c9708d57a9ebce42a9ccb6ea82ac Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 29 Jan 2020 00:04:59 +0300 Subject: [PATCH 32/38] :construction: Remove tests Signed-off-by: Emil --- .../doctype/ocr_read/test_ocr_read.py | 128 +++++++++--------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 56221894..0949f230 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -93,70 +93,70 @@ def setUp(self): def tearDown(self): delete_ocr_reads() - def test_ocr_read_image_bg(self): - print("1") - frappe.set_user("Administrator") - doc = frappe.get_doc({ - "doctype": "OCR Read", - "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), - "language": "eng" - }) - - self.assertEqual(None, doc.read_result) - - worker = doc.read_image_bg(is_async=False) - - # Wait worker completion before moving on in the tests - while worker._status == "queued": - time.sleep(5) - - # Check worker completion and get "new" document after update by bg job - self.assertEqual(worker._status, "finished") - new_doc = frappe.get_doc("OCR Read", - {"file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample1.jpg"), - "language": "eng"}) - - self.assertEqual(new_doc.read_result, doc.read_result) - self.assertIn("The quick brown fox", new_doc.read_result) - self.assertIn("jumped over the 5", new_doc.read_result) - self.assertIn("lazy dogs!", new_doc.read_result) - self.assertNotIn("And an elephant!", new_doc.read_result) - - def test_ocr_read_image_bg_pdf(self): - print("2") - frappe.set_user("Administrator") - doc = frappe.get_doc({ - "doctype": "OCR Read", - "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample2.pdf"), - "language": "eng" - }) - - self.assertEqual(None, doc.read_result) - - worker = doc.read_image_bg(is_async=False) - - # Wait worker completion before moving on in the tests - # TODO: Will be better if we can understand how realize producer-consumer pattern - while worker._status == "queued": - time.sleep(5) - - # Check worker completion and get "new" document after update by bg job - self.assertEqual(worker._status, "finished") - new_doc = frappe.get_doc("OCR Read", { - "file_to_read": os.path.join(os.path.dirname(__file__), - os.path.pardir, os.path.pardir, os.path.pardir, - "tests", "test_data", "sample2.pdf"), - "language": "eng"}) - - self.assertEqual(new_doc.read_result, doc.read_result) - self.assertIn("Python Basics", new_doc.read_result) - self.assertNotIn("Java", new_doc.read_result) + # def test_ocr_read_image_bg(self): + # print("1") + # frappe.set_user("Administrator") + # doc = frappe.get_doc({ + # "doctype": "OCR Read", + # "file_to_read": os.path.join(os.path.dirname(__file__), + # os.path.pardir, os.path.pardir, os.path.pardir, + # "tests", "test_data", "sample1.jpg"), + # "language": "eng" + # }) + # + # self.assertEqual(None, doc.read_result) + # + # worker = doc.read_image_bg(is_async=False) + # + # # Wait worker completion before moving on in the tests + # while worker._status == "queued": + # time.sleep(5) + # + # # Check worker completion and get "new" document after update by bg job + # self.assertEqual(worker._status, "finished") + # new_doc = frappe.get_doc("OCR Read", + # {"file_to_read": os.path.join(os.path.dirname(__file__), + # os.path.pardir, os.path.pardir, os.path.pardir, + # "tests", "test_data", "sample1.jpg"), + # "language": "eng"}) + # + # self.assertEqual(new_doc.read_result, doc.read_result) + # self.assertIn("The quick brown fox", new_doc.read_result) + # self.assertIn("jumped over the 5", new_doc.read_result) + # self.assertIn("lazy dogs!", new_doc.read_result) + # self.assertNotIn("And an elephant!", new_doc.read_result) + + # def test_ocr_read_image_bg_pdf(self): + # print("2") + # frappe.set_user("Administrator") + # doc = frappe.get_doc({ + # "doctype": "OCR Read", + # "file_to_read": os.path.join(os.path.dirname(__file__), + # os.path.pardir, os.path.pardir, os.path.pardir, + # "tests", "test_data", "sample2.pdf"), + # "language": "eng" + # }) + # + # self.assertEqual(None, doc.read_result) + # + # worker = doc.read_image_bg(is_async=False) + # + # # Wait worker completion before moving on in the tests + # # TODO: Will be better if we can understand how realize producer-consumer pattern + # while worker._status == "queued": + # time.sleep(5) + # + # # Check worker completion and get "new" document after update by bg job + # self.assertEqual(worker._status, "finished") + # new_doc = frappe.get_doc("OCR Read", { + # "file_to_read": os.path.join(os.path.dirname(__file__), + # os.path.pardir, os.path.pardir, os.path.pardir, + # "tests", "test_data", "sample2.pdf"), + # "language": "eng"}) + # + # self.assertEqual(new_doc.read_result, doc.read_result) + # self.assertIn("Python Basics", new_doc.read_result) + # self.assertNotIn("Java", new_doc.read_result) def test_ocr_read_image(self): print("3") From 3b12e65845ef14b3b159aa0ef34f2c43cb101e2f Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 29 Jan 2020 01:11:38 +0300 Subject: [PATCH 33/38] :construction: Add getter instead of variable Signed-off-by: Emil --- .../doctype/ocr_read/test_ocr_read.py | 132 +++++++++--------- 1 file changed, 64 insertions(+), 68 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 0949f230..3c76ee07 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -93,73 +93,72 @@ def setUp(self): def tearDown(self): delete_ocr_reads() - # def test_ocr_read_image_bg(self): - # print("1") - # frappe.set_user("Administrator") - # doc = frappe.get_doc({ - # "doctype": "OCR Read", - # "file_to_read": os.path.join(os.path.dirname(__file__), - # os.path.pardir, os.path.pardir, os.path.pardir, - # "tests", "test_data", "sample1.jpg"), - # "language": "eng" - # }) - # - # self.assertEqual(None, doc.read_result) - # - # worker = doc.read_image_bg(is_async=False) - # - # # Wait worker completion before moving on in the tests - # while worker._status == "queued": - # time.sleep(5) - # - # # Check worker completion and get "new" document after update by bg job - # self.assertEqual(worker._status, "finished") - # new_doc = frappe.get_doc("OCR Read", - # {"file_to_read": os.path.join(os.path.dirname(__file__), - # os.path.pardir, os.path.pardir, os.path.pardir, - # "tests", "test_data", "sample1.jpg"), - # "language": "eng"}) - # - # self.assertEqual(new_doc.read_result, doc.read_result) - # self.assertIn("The quick brown fox", new_doc.read_result) - # self.assertIn("jumped over the 5", new_doc.read_result) - # self.assertIn("lazy dogs!", new_doc.read_result) - # self.assertNotIn("And an elephant!", new_doc.read_result) - - # def test_ocr_read_image_bg_pdf(self): - # print("2") - # frappe.set_user("Administrator") - # doc = frappe.get_doc({ - # "doctype": "OCR Read", - # "file_to_read": os.path.join(os.path.dirname(__file__), - # os.path.pardir, os.path.pardir, os.path.pardir, - # "tests", "test_data", "sample2.pdf"), - # "language": "eng" - # }) - # - # self.assertEqual(None, doc.read_result) - # - # worker = doc.read_image_bg(is_async=False) - # - # # Wait worker completion before moving on in the tests - # # TODO: Will be better if we can understand how realize producer-consumer pattern - # while worker._status == "queued": - # time.sleep(5) - # - # # Check worker completion and get "new" document after update by bg job - # self.assertEqual(worker._status, "finished") - # new_doc = frappe.get_doc("OCR Read", { - # "file_to_read": os.path.join(os.path.dirname(__file__), - # os.path.pardir, os.path.pardir, os.path.pardir, - # "tests", "test_data", "sample2.pdf"), - # "language": "eng"}) - # - # self.assertEqual(new_doc.read_result, doc.read_result) - # self.assertIn("Python Basics", new_doc.read_result) - # self.assertNotIn("Java", new_doc.read_result) + def test_ocr_read_image_bg(self): + frappe.set_user("Administrator") + doc = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng" + }) + + self.assertEqual(None, doc.read_result) + + worker = doc.read_image_bg(is_async=False) + + # Wait worker completion before moving on in the tests + while worker.get_status() == "queued": + time.sleep(5) + print(worker) + + # Check worker completion and get "new" document after update by bg job + self.assertEqual(worker.get_status(), "finished") + new_doc = frappe.get_doc("OCR Read", + {"file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample1.jpg"), + "language": "eng"}) + + self.assertEqual(new_doc.read_result, doc.read_result) + self.assertIn("The quick brown fox", new_doc.read_result) + self.assertIn("jumped over the 5", new_doc.read_result) + self.assertIn("lazy dogs!", new_doc.read_result) + self.assertNotIn("And an elephant!", new_doc.read_result) + + def test_ocr_read_image_bg_pdf(self): + frappe.set_user("Administrator") + doc = frappe.get_doc({ + "doctype": "OCR Read", + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample2.pdf"), + "language": "eng" + }) + + self.assertEqual(None, doc.read_result) + + worker = doc.read_image_bg(is_async=False) + + # Wait worker completion before moving on in the tests + # TODO: Will be better if we can understand how realize producer-consumer pattern + while worker.get_status() == "queued": + time.sleep(5) + print(worker.__dict__) + + # Check worker completion and get "new" document after update by bg job + self.assertEqual(worker.get_status(), "finished") + new_doc = frappe.get_doc("OCR Read", { + "file_to_read": os.path.join(os.path.dirname(__file__), + os.path.pardir, os.path.pardir, os.path.pardir, + "tests", "test_data", "sample2.pdf"), + "language": "eng"}) + + self.assertEqual(new_doc.read_result, doc.read_result) + self.assertIn("Python Basics", new_doc.read_result) + self.assertNotIn("Java", new_doc.read_result) def test_ocr_read_image(self): - print("3") frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", @@ -178,7 +177,6 @@ def test_ocr_read_image(self): self.assertNotIn("And an elephant!", recognized_text) def test_ocr_read_pdf(self): - print("4") frappe.set_user("Administrator") doc = frappe.get_doc({ "doctype": "OCR Read", @@ -197,7 +195,6 @@ def test_ocr_read_pdf(self): self.assertNotIn("Java", recognized_text) def test_force_attach_file_doc(self): - print("5") doc = frappe.get_doc({ "doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), @@ -219,7 +216,6 @@ def test_force_attach_file_doc(self): self.assertEqual('/private/files/test.tif', forced_doc.file_to_read) def test_ocr_read_list(self): - print("6") # frappe.set_user("test1@example.com") frappe.set_user("Administrator") res = frappe.get_list("OCR Read", filters=[ From a17948f4e9ec0ffd9a6aa40064ecb3f3731ceb2c Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 29 Jan 2020 02:46:22 +0300 Subject: [PATCH 34/38] :construction: Logging tests and requeueing jobs Signed-off-by: Emil --- .../erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 3c76ee07..0cce7d0c 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -38,6 +38,7 @@ # } for file_to_read, language in docs]) # # return test_objects +from rq import requeue_job def create_ocr_reads(): @@ -110,9 +111,11 @@ def test_ocr_read_image_bg(self): # Wait worker completion before moving on in the tests while worker.get_status() == "queued": time.sleep(5) - print(worker) # Check worker completion and get "new" document after update by bg job + if worker.get_status() == "failed": + requeue_job(worker.get_id()) + print(worker.__dict__) self.assertEqual(worker.get_status(), "finished") new_doc = frappe.get_doc("OCR Read", {"file_to_read": os.path.join(os.path.dirname(__file__), @@ -144,7 +147,11 @@ def test_ocr_read_image_bg_pdf(self): # TODO: Will be better if we can understand how realize producer-consumer pattern while worker.get_status() == "queued": time.sleep(5) - print(worker.__dict__) + + if worker.get_status() == "failed": + requeue_job(worker.get_id()) + time.sleep(5) + print(worker.__dict__) # Check worker completion and get "new" document after update by bg job self.assertEqual(worker.get_status(), "finished") From 630b2e530242d0896aeb926d3c3dc8a62c118309 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 29 Jan 2020 12:18:50 +0300 Subject: [PATCH 35/38] :construction: print values of worker Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 0cce7d0c..745c7422 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -114,8 +114,8 @@ def test_ocr_read_image_bg(self): # Check worker completion and get "new" document after update by bg job if worker.get_status() == "failed": - requeue_job(worker.get_id()) - print(worker.__dict__) + print(worker.__dict__) + print(doc.read_result) self.assertEqual(worker.get_status(), "finished") new_doc = frappe.get_doc("OCR Read", {"file_to_read": os.path.join(os.path.dirname(__file__), From 5278ed38891af28d153b12160bdcd27ab6af9711 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 29 Jan 2020 13:33:36 +0300 Subject: [PATCH 36/38] :white_check_mark: Remove while construction and add now parameter Signed-off-by: Emil --- .../erpnext_ocr/doctype/ocr_read/ocr_read.py | 4 ++-- .../doctype/ocr_read/test_ocr_read.py | 22 ++----------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index f0f6717a..bde47567 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -53,9 +53,9 @@ def __init__(self, *args, **kwargs): def read_image(self): return read_ocr(self) - def read_image_bg(self, is_async=True): + def read_image_bg(self, is_async=True,now=False): return frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_ocr", queue="long", - timeout=1500, is_async=is_async, **{'obj': self}) + timeout=1500, is_async=is_async, now=now, **{'obj': self}) @frappe.whitelist() diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 745c7422..25266bb2 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -106,17 +106,11 @@ def test_ocr_read_image_bg(self): self.assertEqual(None, doc.read_result) - worker = doc.read_image_bg(is_async=False) + doc.read_image_bg(is_async=False, now=True) # Wait worker completion before moving on in the tests - while worker.get_status() == "queued": - time.sleep(5) # Check worker completion and get "new" document after update by bg job - if worker.get_status() == "failed": - print(worker.__dict__) - print(doc.read_result) - self.assertEqual(worker.get_status(), "finished") new_doc = frappe.get_doc("OCR Read", {"file_to_read": os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, os.path.pardir, @@ -141,20 +135,8 @@ def test_ocr_read_image_bg_pdf(self): self.assertEqual(None, doc.read_result) - worker = doc.read_image_bg(is_async=False) + doc.read_image_bg(is_async=False, now=True) - # Wait worker completion before moving on in the tests - # TODO: Will be better if we can understand how realize producer-consumer pattern - while worker.get_status() == "queued": - time.sleep(5) - - if worker.get_status() == "failed": - requeue_job(worker.get_id()) - time.sleep(5) - print(worker.__dict__) - - # Check worker completion and get "new" document after update by bg job - self.assertEqual(worker.get_status(), "finished") new_doc = frappe.get_doc("OCR Read", { "file_to_read": os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, os.path.pardir, From db2bc2d15c611a58e519d59f16baf71b466ad530 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 29 Jan 2020 13:35:54 +0300 Subject: [PATCH 37/38] :lipstick: Remove old libraries Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py | 2 +- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index bde47567..4ffd8a37 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -53,7 +53,7 @@ def __init__(self, *args, **kwargs): def read_image(self): return read_ocr(self) - def read_image_bg(self, is_async=True,now=False): + def read_image_bg(self, is_async=True, now=False): return frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_ocr", queue="long", timeout=1500, is_async=is_async, now=now, **{'obj': self}) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 25266bb2..6d15976a 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals -import time import frappe import unittest @@ -38,7 +37,6 @@ # } for file_to_read, language in docs]) # # return test_objects -from rq import requeue_job def create_ocr_reads(): From f75dd9bbc5daebe9a01fb7192965ccc6a2e70390 Mon Sep 17 00:00:00 2001 From: Emil Date: Mon, 3 Feb 2020 14:58:07 +0300 Subject: [PATCH 38/38] :white_check_mark: Add test for Validation Error Signed-off-by: Emil --- erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py index 6d15976a..df1a7ed7 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py @@ -10,7 +10,7 @@ import unittest import os -from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import force_attach_file_doc +from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import force_attach_file_doc, read_ocr # TODO Frappe default test records creation @@ -215,3 +215,6 @@ def test_ocr_read_list(self): self.assertTrue(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, os.path.pardir, "tests", "test_data", "sample2.pdf") in files_to_read) + + def test_read_ocr(self): + self.assertRaises(frappe.ValidationError, read_ocr, obj=None) \ No newline at end of file