Skip to content

Commit

Permalink
logging improvements and removal of preview generation (#244)
Browse files Browse the repository at this point in the history
  • Loading branch information
ciur authored Nov 2, 2023
1 parent 8d74fca commit 9886804
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 174 deletions.
12 changes: 7 additions & 5 deletions docker/dev/logging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ handlers:
formatter: verbose

loggers:
auth_server:
level: DEBUG
handlers: [console]
papermerge.search.tasks:
level: DEBUG
celery:
level: INFO
handlers: [ console ]
propagate: no
format: verbose
papermerge.core:
level: INFO
handlers: [console]
propagate: no
format: verbose
28 changes: 0 additions & 28 deletions papermerge/conf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@

DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'

# Custom signal processor handles connections errors (to elasticsearch)
# and reports them as warnings. This way, even when no connection to ES
# is available, documents, folders, pages etc can still be used
ELASTICSEARCH_DSL_SIGNAL_PROCESSOR = 'papermerge.search.signals.CustomSignalProcessor' # noqa

MEDIA_URL = config.get(
'media',
'url',
Expand Down Expand Up @@ -226,27 +221,4 @@
},
}

SEARCH_ENGINES_MAP = {
'elastic': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
'elastic7': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
'elasticsearch7': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
'elasticsearch': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
'es7': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
'es': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
'solr': 'haystack.backends.solr_backend.SolrEngine',
'whoosh': 'haystack.backends.whoosh_backend.WhooshEngine',
'xapian': 'xapian_backend.XapianEngine',
}

HAYSTACK_DOCUMENT_FIELD = 'indexed_content'

search_engine = config.get('search', 'engine', default='xapian')


HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': SEARCH_ENGINES_MAP[search_engine],
},
}

HAYSTACK_SIGNAL_PROCESSOR = 'papermerge.search.signals.SignalProcessor'
18 changes: 0 additions & 18 deletions papermerge/core/models/document_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from django.utils.translation import gettext_lazy as _

from papermerge.core.pathlib import abs_docver_path
from papermerge.core.storage import abs_path
from papermerge.core.utils import image as image_utils

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -74,22 +72,6 @@ class Meta:
def __repr__(self):
return f"DocumentVersion(id={self.pk}, number={self.number})"

def generate_previews(self, page_number=None):
logger.debug('generate_previews BEGIN')
abs_dirname = abs_path(self.document_path.dirname_sidecars())

if page_number is None:
number = 1
else:
number = page_number

image_utils.generate_preview(
pdf_path=Path(abs_path(self.document_path.url)),
output_folder=Path(abs_dirname),
size=900,
page_number=number
)

@property
def download_url(self):
return f"/api/document-versions/{self.id}/download"
Expand Down
12 changes: 1 addition & 11 deletions papermerge/core/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .signal_definitions import document_post_upload
from .tasks import delete_user_data as delete_user_data_task
from .tasks import generate_page_previews_task, ocr_document_task
from .tasks import ocr_document_task

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -197,16 +197,6 @@ def worker_shutdown(**_):
file.unlink()


@receiver(document_post_upload, sender=Document)
def generate_page_previews(
sender,
document_version: DocumentVersion,
**_
):
"""Generates page previews"""
generate_page_previews_task.delay(str(document_version.id))


@receiver(document_post_upload, sender=Document)
def receiver_document_post_upload(
sender,
Expand Down
13 changes: 0 additions & 13 deletions papermerge/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,9 @@ def _post_ocr_document(
)
update_document_pages(document_id)

# generate previews for newly created document version (which has OCR)
doc = Document.objects.get(pk=document_id)
doc_version = doc.versions.last()

generate_page_previews_task.delay(str(doc_version.id))

return document_id


@shared_task
def generate_page_previews_task(document_version_id):
document_version = DocumentVersion.objects.get(id=document_version_id)
document_version.generate_previews()
return document_version_id


def increment_document_version(
document_id,
target_docver_uuid: UUID,
Expand Down
6 changes: 2 additions & 4 deletions tests/core/models/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ def test_idified_title_multiple_dots_in_title(self):
)

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_upload_payload_to_zero_sized_document(self, _x, _y):
def test_upload_payload_to_zero_sized_document(self, _x):
"""
Upon creation document model has associated zero sized document_version
i.e. document_version.size == 0.
Expand Down Expand Up @@ -155,8 +154,7 @@ def test_upload_payload_to_zero_sized_document(self, _x, _y):
)

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_version_bump_from_pages(self, _1, _2):
def test_version_bump_from_pages(self, _):
"""
Move two pages from source document to destination document
"""
Expand Down
5 changes: 2 additions & 3 deletions tests/core/models/test_document_version.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import io

from papermerge.test import TestCase
from papermerge.core.models import (User, Document)
from papermerge.test import maker
from papermerge.core.models import Document, User
from papermerge.test import TestCase, maker


class TestDocumentVersionModel(TestCase):
Expand Down
24 changes: 8 additions & 16 deletions tests/core/test_page_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ def test_copy_text_field():

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_apply_pages_op(_, __):
def test_apply_pages_op(_):
"""This test checks if `apply_pages_op` correctly copies
page text data"""
user = user_recipe.make()
Expand Down Expand Up @@ -80,8 +79,7 @@ def test_apply_pages_op(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_move_pages_one_single_page_strategy_mix(_, __):
def test_move_pages_one_single_page_strategy_mix(_):
"""Scenario tests moving of one page from
src to dst (strategy: mix). Scenario is illustrated
in following table:
Expand Down Expand Up @@ -171,8 +169,7 @@ def test_move_pages_one_single_page_strategy_mix(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_move_pages_two_pages_strategy_mix(_, __):
def test_move_pages_two_pages_strategy_mix(_):
"""Scenario tests moving of two pages from
src to dst (strategy: mix). Scenario is illustrated
in following table:
Expand Down Expand Up @@ -266,8 +263,7 @@ def test_move_pages_two_pages_strategy_mix(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_move_pages_one_single_page_strategy_replace(_, __):
def test_move_pages_one_single_page_strategy_replace(_):
"""Scenario tests moving of one page from
src to dst (strategy: replace). Scenario is illustrated
in following table:
Expand Down Expand Up @@ -338,8 +334,7 @@ def test_move_pages_one_single_page_strategy_replace(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_move_all_pages_of_the_doc_out_mix(_, __):
def test_move_all_pages_of_the_doc_out_mix(_):
"""Scenario tests moving of ALL page of source document.
In this case source document will be entirely deleted:
Expand Down Expand Up @@ -440,8 +435,7 @@ def test_move_all_pages_of_the_doc_out_mix(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_move_all_pages_of_the_doc_out_replace_strategy(_, __):
def test_move_all_pages_of_the_doc_out_replace_strategy(_):
"""Scenario tests moving of ALL page of source document.
In this case source document will be entirely deleted:
Expand Down Expand Up @@ -521,8 +515,7 @@ def test_move_all_pages_of_the_doc_out_replace_strategy(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_extract_two_pages_to_folder_all_pages_in_one_doc(_, __):
def test_extract_two_pages_to_folder_all_pages_in_one_doc(_):
"""Scenario tests extraction of first two pages from
source document into destination folder.
Both pages are extracted into one single destination document.
Expand Down Expand Up @@ -585,8 +578,7 @@ def test_extract_two_pages_to_folder_all_pages_in_one_doc(_, __):

@pytest.mark.django_db
@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_extract_two_pages_to_folder_each_page_in_separate_doc(_, __):
def test_extract_two_pages_to_folder_each_page_in_separate_doc(_):
"""Scenario tests extraction of first two pages of source document
into destination folder. Each page is extracted into
a separate document, as result to new documents are created.
Expand Down
18 changes: 6 additions & 12 deletions tests/core/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,8 +682,7 @@ class TestRemovePdfPages(TestCase):
"""Tests for remove_pdf_pages"""

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_remove_pdf_pages_basic_1(self, _, _x):
def test_remove_pdf_pages_basic_1(self, _):
"""Remove one page from the document version"""
src_document = maker.document(
"s3.pdf",
Expand All @@ -702,8 +701,7 @@ def test_remove_pdf_pages_basic_1(self, _, _x):
assert content == "S2 S3"

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_remove_pdf_pages_basic_2(self, _, _x):
def test_remove_pdf_pages_basic_2(self, _):
"""Remove last two pages from the document version"""
src_document = maker.document(
"s3.pdf",
Expand All @@ -722,8 +720,7 @@ def test_remove_pdf_pages_basic_2(self, _, _x):
assert content == "S1"

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_remove_pdf_pages_invalid_input(self, _, _x):
def test_remove_pdf_pages_invalid_input(self, _):
"""Junk page_numbers input"""
src_document = maker.document(
"s3.pdf",
Expand Down Expand Up @@ -751,8 +748,7 @@ class TestInserPdfPagesUtilityFunction(TestCase):
"""Tests for insert_pdf_pages"""

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_insert_pdf_pages_basic_1(self, _, _x):
def test_insert_pdf_pages_basic_1(self, _):
"""
We test moving of one page from source document version to
destination document version.
Expand Down Expand Up @@ -788,8 +784,7 @@ def test_insert_pdf_pages_basic_1(self, _, _x):
assert "S1 D1 D2 D3" == dst_new_content

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_insert_pdf_pages_basic_2(self, _, _x):
def test_insert_pdf_pages_basic_2(self, _):
"""
We test moving of two pages from source document version to
destination document version.
Expand Down Expand Up @@ -825,8 +820,7 @@ def test_insert_pdf_pages_basic_2(self, _, _x):
assert "D1 S1 S3 D2 D3" == dst_new_content

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_insert_pdf_pages_when_dst_old_is_None(self, _, _x):
def test_insert_pdf_pages_when_dst_old_is_None(self, _):
"""
We test moving of two pages from source document version to
destination document version with dst_old_version=None.
Expand Down
6 changes: 2 additions & 4 deletions tests/core/views/test_document_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ def setUp(self):
self.client_john.force_authenticate(user=self.john)

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_only_owner_can_download_document_version(self, _1, _2):
def test_only_owner_can_download_document_version(self, _1):
"""
Assert that if user is not the owner of the document he/she won't
be able to download document versions
Expand All @@ -67,8 +66,7 @@ def test_only_owner_can_download_document_version(self, _1, _2):
assert response.status_code == 403

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_download_document_version(self, _1, _2):
def test_download_document_version(self, _1):
"""Asserts that document version download works"""
doc = maker.document(
"s3.pdf",
Expand Down
18 changes: 6 additions & 12 deletions tests/core/views/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def setUp(self):
shutil.rmtree(self.media / 'sidecars', ignore_errors=True)

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_document_upload_view(self, _x, _y):
def test_document_upload_view(self, _x):
doc = Document.objects.create_document(
title="three-pages.pdf",
lang="deu",
Expand Down Expand Up @@ -123,8 +122,7 @@ def test_rename_document(self):
class DocumentOcrTextViewTest(PapermergeTestCase):

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_basic_get_document_ocr_text(self, _x, _y):
def test_basic_get_document_ocr_text(self, _):
"""
Retrieve OCRed text from all pages of given document's latest version
"""
Expand All @@ -144,8 +142,7 @@ def test_basic_get_document_ocr_text(self, _x, _y):
assert response.data['text'] == 'S1 S2 S3'

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_get_document_ocr_text_of_pages_2_and_3(self, _x, _y):
def test_get_document_ocr_text_of_pages_2_and_3(self, _x):
"""
Retrieve OCRed text from given pages of given document's latest version
Expand Down Expand Up @@ -176,8 +173,7 @@ def test_get_document_ocr_text_of_pages_2_and_3(self, _x, _y):
assert response.data['text'] == 'S2 S3'

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_get_document_ocr_text_of_page_1(self, _x, _y):
def test_get_document_ocr_text_of_page_1(self, _x):
"""
Retrieve OCRed text from given pages of given document's latest version
Expand Down Expand Up @@ -205,8 +201,7 @@ def test_get_document_ocr_text_of_page_1(self, _x, _y):
assert response.data['text'] == 'S1'

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_get_document_junk_input(self, _x, _y):
def test_get_document_junk_input(self, _x):
"""
Make sure there are no exception on junk input
"""
Expand All @@ -233,8 +228,7 @@ def test_get_document_junk_input(self, _x, _y):
assert response.data['text'] == 'S1 S2 S3'

@patch('papermerge.core.signals.ocr_document_task')
@patch('papermerge.core.signals.generate_page_previews_task')
def test_get_document_ocr_text_filter_by_page_ids(self, _x, _y):
def test_get_document_ocr_text_filter_by_page_ids(self, _x):
"""
Retrieve OCRed text from given pages of given document's latest version
Expand Down
Loading

0 comments on commit 9886804

Please sign in to comment.