From c3cba0a329cccddd74eb8c8b4c24c02ef7121031 Mon Sep 17 00:00:00 2001 From: Eugen Ciur Date: Sat, 4 Nov 2023 08:17:54 +0100 Subject: [PATCH] Sync search index with page management (#246) --- papermerge/core/constants.py | 1 + papermerge/core/models/page.py | 4 - papermerge/core/page_ops.py | 92 ++- papermerge/search/schema.py | 2 + papermerge/search/tasks.py | 157 ++-- tests/core/test_page_ops.py | 54 +- tests/core/views/test_groups.py | 27 - tests/core/views/test_pages.py | 1079 -------------------------- tests/core/views/test_permissions.py | 35 - tests/core/views/test_preferences.py | 17 - 10 files changed, 236 insertions(+), 1232 deletions(-) delete mode 100644 tests/core/views/test_groups.py delete mode 100644 tests/core/views/test_pages.py delete mode 100644 tests/core/views/test_permissions.py delete mode 100644 tests/core/views/test_preferences.py diff --git a/papermerge/core/constants.py b/papermerge/core/constants.py index 03cf75ec6..644d47541 100644 --- a/papermerge/core/constants.py +++ b/papermerge/core/constants.py @@ -7,6 +7,7 @@ DEFAULT_TAG_BG_COLOR = '#c41fff' DEFAULT_TAG_FG_COLOR = '#ffffff' INDEX_ADD_NODE = 'index_add_node' +INDEX_ADD_DOCS = 'index_add_docs' INDEX_ADD_PAGES = 'index_add_pages' INDEX_REMOVE_NODE = 'index_remove_node' INDEX_UPDATE = 'index_update' diff --git a/papermerge/core/models/page.py b/papermerge/core/models/page.py index a8a7cc830..0df3eab27 100644 --- a/papermerge/core/models/page.py +++ b/papermerge/core/models/page.py @@ -134,10 +134,6 @@ def update_text_field(self, stream: io.StringIO): Returns text read from IO stream """ - logger.debug( - 'update_text_field:' - f'len(page.stripped_text)=={len(self.stripped_text)}' - ) self.text = stream.read() self.save() diff --git a/papermerge/core/page_ops.py b/papermerge/core/page_ops.py index ac55454bf..c2066c4f2 100644 --- a/papermerge/core/page_ops.py +++ b/papermerge/core/page_ops.py @@ -8,7 +8,7 @@ from celery import current_app from pikepdf import Pdf -from papermerge.core.constants import INDEX_UPDATE +from papermerge.core.constants import INDEX_ADD_DOCS, INDEX_UPDATE from papermerge.core.models import Document, Folder, Page from papermerge.core.models.utils import OCR_STATUS_SUCCEEDED from papermerge.core.pathlib import abs_page_path @@ -46,8 +46,8 @@ def apply_pages_op(items: List[PageAndRotOp]) -> List[PyDocVer]: ) notify_index_update( - remove_page_ids=[str(p.id) for p in old_version.pages.all()], - add_page_ids=[str(p.id) for p in new_version.pages.all()] + remove_ver_id=str(old_version.id), + add_ver_id=str(new_version.id) ) return doc.versions.all() @@ -185,7 +185,6 @@ def copy_text_field( # list of old_version page numbers page_numbers=page_numbers ) - # updates page.text fields and document_version.text field dst.update_text_field(streams) @@ -251,7 +250,7 @@ def move_pages_mix( src_old_version, src_new_version, moved_pages_count - ] = copy_pages(source_page_ids) + ] = copy_without_pages(source_page_ids) moved_pages = Page.objects.filter(pk__in=source_page_ids) moved_page_ids = [page.id for page in moved_pages] @@ -302,6 +301,10 @@ def move_pages_mix( src_old_version.document.delete() return [None, dst_new_version.document] + notify_index_update( + add_ver_id=str(dst_new_version.id), + remove_ver_id=str(dst_old_version.id) + ) return [src_new_version.document, dst_new_version.document] @@ -321,7 +324,7 @@ def move_pages_replace( src_old_version, src_new_version, moved_pages_count - ] = copy_pages(source_page_ids) + ] = copy_without_pages(source_page_ids) moved_pages = Page.objects.filter(pk__in=source_page_ids) moved_page_ids = [page.id for page in moved_pages] @@ -357,6 +360,10 @@ def move_pages_replace( src_old_version.document.delete() return [None, dst_new_version.document] + notify_index_update( + add_ver_id=str(dst_new_version.id), + remove_ver_id=str(dst_old_version.id) + ) return [src_new_version.document, dst_new_version.document] @@ -376,7 +383,7 @@ def extract_pages( old_doc_ver, new_doc_ver, moved_pages_count - ] = copy_pages(source_page_ids) + ] = copy_without_pages(source_page_ids) if strategy == ExtractStrategy.ONE_PAGE_PER_DOC: new_docs = extract_to_single_paged_docs( @@ -398,6 +405,13 @@ def extract_pages( else: target_docs = new_docs + for doc in target_docs: + logger.debug( + f"Notifying index to add doc.title={doc.title} doc.id={doc.id}" + ) + logger.debug(f"Doc last version={doc.versions.last()}") + notify_index_add_docs([str(doc.id) for doc in target_docs]) + if old_doc_ver.pages.count() == moved_pages_count: # all source pages were extracted, document should # be deleted as its last version does not contain @@ -413,7 +427,11 @@ def extract_to_single_paged_docs( target_folder_id: uuid.UUID, title_format: str ) -> List[Document]: + """Extracts given pages into separate documents + Each source page will end up in a separate document + located in target folder. + """ pages = Page.objects.filter(pk__in=source_page_ids) dst_folder = Folder.objects.get(pk=target_folder_id) result = [] @@ -437,6 +455,12 @@ def extract_to_single_paged_docs( target_ids=[doc_version.pages.first().id] ) + copy_text_field( + src=pages.first().document_version, + dst=doc_version, + page_numbers=[page.number] + ) + return result @@ -445,6 +469,11 @@ def extract_to_multi_paged_doc( target_folder_id: uuid.UUID, title_format: str ) -> Document: + """Extracts given pages into separate documents + + All source pages will end up in a single document + located in target folder. + """ title = f'{title_format}-{uuid.uuid4()}.pdf' pages = Page.objects.filter(pk__in=source_page_ids) @@ -466,21 +495,27 @@ def extract_to_multi_paged_doc( target_ids=[page.id for page in dst_version.pages.order_by('number')] ) + copy_text_field( + src=pages.first().document_version, + dst=dst_version, + page_numbers=[p.number for p in pages] + ) + return new_doc -def copy_pages( +def copy_without_pages( page_ids: List[uuid.UUID] ) -> [PyDocVer, PyDocVer, int]: - """Copy pages from src doc version to dst doc version + """Copy all pages WHICH ARE NOT in `page_ids` list from src to dst All pages are assumed to be from same source document version. - Source document version is the doc ver of the first page - (again, all pages are assumed to be part of same doc ver). - The destination doc ver is created. All pages referenced - by IDs are copied into newly created destination doc version. + Source is the document version of the first page. + Destination will be created as new document version. + Destination will have all source pages WHICH ARE NOT in the `page_ids` list. - The OCR data/page folder is copied along (reused). + The OCR data/page folder reused. + Also sends INDEX UPDATE notification. """ moved_pages = Page.objects.filter(pk__in=page_ids) moved_page_ids = [page.id for page in moved_pages] @@ -503,7 +538,7 @@ def copy_pages( src_keys = [ # IDs of the pages which were not removed page.id for page in src_old_version.pages.order_by('number') - if not (page.id in moved_page_ids) + if not (page.id in moved_page_ids) # Notice the negation ] dst_values = [ @@ -516,6 +551,21 @@ def copy_pages( f"Pages with IDs {not_copied_ids} do not have OCR data" ) + copy_text_field( + src=src_old_version, + dst=src_new_version, + page_numbers=[ + p.number + for p in src_old_version.pages.all() + if not (p.id in moved_page_ids) # Notice the negation + ] + ) + + notify_index_update( + remove_ver_id=str(src_old_version.id), + add_ver_id=str(src_new_version.id) + ) + return [ src_old_version, # orig. ver where pages were copied from src_new_version, # ver where pages were copied to @@ -525,8 +575,14 @@ def copy_pages( @skip_in_tests def notify_index_update( - add_page_ids: List[str], - remove_page_ids: List[str] + add_ver_id: str, + remove_ver_id: str ): """Sends tasks to the index to remove/add pages""" - current_app.send_task(INDEX_UPDATE, (add_page_ids, remove_page_ids)) + current_app.send_task(INDEX_UPDATE, (add_ver_id, remove_ver_id)) + + +@skip_in_tests +def notify_index_add_docs(add_doc_ids: List[str]): + logger.debug(f"Sending task {INDEX_ADD_DOCS} with {add_doc_ids}") + current_app.send_task(INDEX_ADD_DOCS, (add_doc_ids, )) diff --git a/papermerge/search/schema.py b/papermerge/search/schema.py index cc36c3b4f..ad3c76019 100644 --- a/papermerge/search/schema.py +++ b/papermerge/search/schema.py @@ -98,6 +98,8 @@ class Model(Schema): def __str__(self): return f'IndexEntity(id={self.id}, title={self.title}, '\ f'document_id={self.document_id},' \ + f'number={self.page_number},' \ + f'text=|{self.text}|,' \ f'type={self.entity_type})' def get_idx_value__tags(self): diff --git a/papermerge/search/tasks.py b/papermerge/search/tasks.py index bcda64b08..6f81a2277 100644 --- a/papermerge/search/tasks.py +++ b/papermerge/search/tasks.py @@ -6,15 +6,32 @@ from django.conf import settings from salinic import IndexRW, create_engine -from papermerge.core.constants import (INDEX_ADD_NODE, INDEX_ADD_PAGES, - INDEX_REMOVE_NODE, INDEX_UPDATE) -from papermerge.core.models import BaseTreeNode, DocumentVersion, Page +from papermerge.core import constants +from papermerge.core.models import (BaseTreeNode, Document, DocumentVersion, + Page) from papermerge.core.tasks import ocr_document_task from papermerge.search.schema import FOLDER, PAGE, ColoredTag, Model logger = logging.getLogger(__name__) +RETRY_KWARGS = { + 'max_retries': 7, # number of times to retry the task + 'countdown': 5 # Time in seconds to delay the retry for. +} + + +def get_index(): + try: + # may happen when using xapian search backend and multiple + # workers try to get write access to the index + engine = create_engine(settings.SEARCH_URL) + except Exception as e: + logger.warning(f"Exception '{e}' occurred while opening engine") + + return IndexRW(engine, schema=Model) + + @task_success.connect(sender=ocr_document_task) def task_success_notifier(sender=None, **kwargs): """ @@ -27,7 +44,11 @@ def task_success_notifier(sender=None, **kwargs): index_add_node(kwargs['result']) -@shared_task(name=INDEX_ADD_NODE) +@shared_task( + name=constants.INDEX_ADD_NODE, + autoretry_for=(Exception,), + retry_kwargs=RETRY_KWARGS +) def index_add_node(node_id: str): """Add node to the search index @@ -36,47 +57,54 @@ def index_add_node(node_id: str): In other words, if folder was already indexed (added before), its record in index will be updated otherwise its record will be inserted. """ - try: - # may happen when using xapian search backend and multiple - # workers try to get write access to the index - engine = create_engine(settings.SEARCH_URL) - except Exception as e: - logger.warning(f"Exception '{e}' occurred while opening engine") - logger.warning(f"Index add for {node_id} interrupted") - return - - index = IndexRW(engine, schema=Model) - node = BaseTreeNode.objects.get(pk=node_id) logger.debug(f'ADD node title={node.title} ID={node.id} to INDEX') + index = get_index() if node.is_document: models = from_document(node) else: models = [from_folder(node)] + logger.debug(f"Adding to index {models}") for model in models: index.add(model) -@shared_task(name=INDEX_REMOVE_NODE) +@shared_task( + name=constants.INDEX_ADD_DOCS, + autoretry_for=(Exception,), + retry_kwargs=RETRY_KWARGS +) +def index_add_docs(doc_ids: List[str]): + """Add list of documents to index""" + logger.debug(f"Add docs with {doc_ids} BEGIN") + docs = Document.objects.filter(pk__in=doc_ids) + index = get_index() + + for doc in docs: + models = from_document(doc) + for model in models: + logger.debug(f"Adding {model} to index") + index.add(model) + + logger.debug(f"Add docs with {doc_ids} END") + + +@shared_task( + name=constants.INDEX_REMOVE_NODE, + autoretry_for=(Exception,), + retry_kwargs=RETRY_KWARGS +) def remove_folder_or_page_from_index(item_ids: List[str]): """Removes folder or page from search index """ logger.debug(f'Removing folder or page {item_ids} from index') - try: - logger.debug(f'Creating engine {settings.SEARCH_URL}') - engine = create_engine(settings.SEARCH_URL) - except Exception as e: - # may happen when using xapian search backend and multiple - # workers try to get write access to the index - logger.warning(f"Exception '{e}' occurred while opening engine") - logger.warning(f"Index remove for {item_ids} interrupted") - return - - index = IndexRW(engine, schema=Model) - + logger.debug( + f"Remove pages or folder from index len(item_ids)= {len(item_ids)}" + ) + index = get_index() for item_id in item_ids: try: logger.debug(f'index remove {item_id}') @@ -88,36 +116,63 @@ def remove_folder_or_page_from_index(item_ids: List[str]): logger.debug('End of remove_folder_or_page_from_index') -@shared_task(name=INDEX_ADD_PAGES) +@shared_task( + name=constants.INDEX_ADD_PAGES, + autoretry_for=(Exception,), + retry_kwargs=RETRY_KWARGS +) def add_pages_to_index(page_ids: List[str]): - try: - # may happen when using xapian search backend and multiple - # workers try to get write access to the index - engine = create_engine(settings.SEARCH_URL) - except Exception as e: - logger.warning(f"Exception '{e}' occurred while opening engine") - logger.warning(f"Index add for {page_ids} interrupted") - return - - index = IndexRW(engine, schema=Model) index_entities = [from_page(page_id) for page_id in page_ids] - + logger.debug( + f"Add pages to index: {index_entities}" + ) + index = get_index() for model in index_entities: index.add(model) -@shared_task(name=INDEX_UPDATE) -def update_index(add_page_ids: List[str], remove_page_ids: List[str]): +@shared_task( + name=constants.INDEX_UPDATE, + autoretry_for=(Exception,), + retry_kwargs=RETRY_KWARGS +) +def update_index(add_ver_id: str, remove_ver_id: str): """Updates index - Removes `remove_page_ids` and adds `add_page_ids` from index in - one "transaction". + Removes pages of `remove_ver_id` document version and adds + pages of `add_ver_id` from/to index in one "transaction". """ logger.debug( - f"Index Update: add={add_page_ids}, remove={remove_page_ids}" + f"Index Update: add={add_ver_id}, remove={remove_ver_id}" ) - remove_folder_or_page_from_index(remove_page_ids) - add_pages_to_index(add_page_ids) + add_ver = None + remove_ver = None + try: + add_ver = DocumentVersion.objects.get(pk=add_ver_id) + except DocumentVersion.DoesNotExist: + logger.debug( + f"Index add doc version {add_ver_id} not found." + ) + try: + remove_ver = DocumentVersion.objects.get(pk=remove_ver_id) + except DocumentVersion.DoesNotExist: + logger.warning(f"Index remove doc version {remove_ver_id} not found") + + if add_ver: # doc ver is there, but does it have pages? + add_page_ids = [str(page.id) for page in add_ver.pages.all()] + if len(add_page_ids) > 0: + # doc ver is there and it has pages + add_pages_to_index(add_page_ids) + else: + logger.debug("Empty page ids. Nothing to add to index") + + if remove_ver: # doc ver is there, but does it have pages? + remove_page_ids = [str(page.id) for page in remove_ver.pages.all()] + if len(remove_page_ids) > 0: + # doc ver is there and it has pages + remove_folder_or_page_from_index(remove_page_ids) + else: + logger.debug("Empty page ids. Nothing to remove from index") def from_page(page_id: str) -> Model: @@ -182,9 +237,13 @@ def from_folder(node: BaseTreeNode) -> Model: return index_entity -def from_document(node: BaseTreeNode) -> List[Model]: +def from_document(node: BaseTreeNode | Document) -> List[Model]: result = [] - doc = node.document + if isinstance(node, BaseTreeNode): + doc = node.document + else: + doc = node # i.e. node is instance of Document + last_ver: DocumentVersion = doc.versions.last() for page in last_ver.pages.all(): diff --git a/tests/core/test_page_ops.py b/tests/core/test_page_ops.py index 012fc5531..de5a7cb27 100644 --- a/tests/core/test_page_ops.py +++ b/tests/core/test_page_ops.py @@ -5,7 +5,8 @@ from papermerge.core.models import Document from papermerge.core.page_ops import (apply_pages_op, copy_text_field, - extract_pages, move_pages) + copy_without_pages, extract_pages, + move_pages) from papermerge.core.pathlib import abs_page_path from papermerge.core.schemas.pages import ExtractStrategy, MoveStrategy from papermerge.core.schemas.pages import Page as PyPage @@ -602,8 +603,14 @@ def test_extract_two_pages_to_folder_each_page_in_separate_doc(_): if page.number <= 2 ]) - [result_old_doc, result_new_docs] = extract_pages( - # we are moving out all pages of the source doc version + # add some text to the source version pages + for p in src_ver.pages.all(): + p.text = f"I am page number {p.number}!" + p.save() + + # page extraction / function under test (FUD) + [result_old_doc, result_new_docs] = extract_pages( # FUD + # we are moving out first two pages of the source doc version source_page_ids=[ page.id for page in src_ver.pages.all() if page.number <= 2 ], @@ -627,11 +634,52 @@ def test_extract_two_pages_to_folder_each_page_in_separate_doc(_): ) == PageDir( saved_src_pages_ids[0], number=1, name="src old" ) + # destination page must be same as first source page + assert dst_pages1[0].text == src_ver.pages.all()[0].text + assert PageDir( dst_pages2[0].id, number=1, name="dst2 newly create doc" ) == PageDir( saved_src_pages_ids[1], number=2, name="src old" ) + # destination page must be same as second source page + assert dst_pages2[0].text == src_ver.pages.all()[1].text + + +@patch('papermerge.core.signals.ocr_document_task') +def test_copy_without_pages(_): + """Scenario + + copy without page 1 + ver X -> ver X + 1 + S1 S2 + S2 + """ + user = user_recipe.make() + # 1. create a doc with two pages + # first page has word "cat" + # second page has word "dog" + src = maker.document( + resource='living-things.pdf', + user=user, + include_ocr_data=True + ) + orig_doc_ver = src.versions.last() + orig_first_page = orig_doc_ver.pages.all()[0] + orig_second_page = orig_doc_ver.pages.all()[1] + orig_first_page.text = "cat" + orig_second_page.text = "dog" + orig_first_page.save() + orig_second_page.save() + # page containing "cat" / first page is left behind + pages_to_leave_behind = [orig_doc_ver.pages.first().id] + + [_, new_ver, _] = copy_without_pages(pages_to_leave_behind) + + assert new_ver.pages.count() == 1 + # new version contains only 'dog' + assert ['dog'] == [p.text for p in new_ver.pages.all()] + assert new_ver.text == 'dog' class PageDir: diff --git a/tests/core/views/test_groups.py b/tests/core/views/test_groups.py deleted file mode 100644 index 86020fd76..000000000 --- a/tests/core/views/test_groups.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest -from django.urls import reverse -from model_bakery import baker - -from papermerge.test import TestCase, perms - - -@pytest.mark.skip() -class GroupsViewPermissionsTestCase(TestCase): - - def test_view_groups_forbidden_for_default_user(self): - """ - User without any permissions does not have access - to 'group-list' - """ - baker.make('group') - response = self.client.get(reverse('group-list')) - assert response.status_code == 403 - - @perms('view_group') # assign to currently logged in user 'view_group' perm - def test_view_groups_allowed_for_user_with_view_perm(self): - """ - Access to 'group-list' is granted if user has 'view_group' permission - """ - baker.make('group') - response = self.client.get(reverse('group-list')) - assert response.status_code == 200 diff --git a/tests/core/views/test_pages.py b/tests/core/views/test_pages.py deleted file mode 100644 index 4a487ff8a..000000000 --- a/tests/core/views/test_pages.py +++ /dev/null @@ -1,1079 +0,0 @@ -import io -import json -import os -import shutil -from pathlib import Path -from unittest.mock import patch - -import pikepdf -import pytest -from django.test import TestCase -from django.urls import reverse -from rest_framework.test import APIClient - -from papermerge.core.models import Document, Folder, User -from papermerge.core.storage import abs_path - -MODELS_DIR_ABS_PATH = os.path.abspath(os.path.dirname(__file__)) -TEST_DIR_ABS_PATH = os.path.dirname( - os.path.dirname(MODELS_DIR_ABS_PATH) -) - - -@pytest.mark.skip() -class PageViewTestCase(TestCase): - - def setUp(self): - self.user = User.objects.create_user(username="user1") - self.doc = Document.objects.create_document( - title="invoice.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self.doc_version = self.doc.versions.last() - self.client = APIClient() - self.client.force_authenticate(user=self.user) - self.resources = Path(TEST_DIR_ABS_PATH) / 'resources' - self.media = Path(TEST_DIR_ABS_PATH) / 'media' - shutil.rmtree(self.media / 'docs', ignore_errors=True) - shutil.rmtree(self.media / 'sidecars', ignore_errors=True) - - def test_page_view_in_json_format(self): - """ - GET /pages/{id}/ - Accept: application/vnd.api+json - """ - self.doc_version.create_pages(page_count=1) - page = self.doc_version.pages.first() - - page.update_text_field(io.StringIO('Hello Page!')) - response = self.client.get( - reverse('pages_page', args=(page.pk,)), - HTTP_ACCEPT='application/vnd.api+json' - ) - - assert response.status_code == 200 - - json_data = json.loads(response.content) - assert json_data['data']['id'] == str(page.pk) - all_attrs = json_data['data']['attributes'] - - assert set(all_attrs.keys()) == {'lang', 'number', 'text', 'svg_url', - 'jpg_url'} - - main_attrs = { - key: all_attrs[key] for key in all_attrs if key in ( - 'lang', 'number', 'text' - ) - } - assert main_attrs == { - 'lang': 'deu', - 'number': 1, - 'text': 'Hello Page!' - } - - @patch('papermerge.core.models.document_version.convert_from_path') - def test_page_view_in_svg_format(self, _): - """ - GET /pages/{id}/ - Accept: image/svg+xml - """ - self.doc_version.create_pages(page_count=1) - page = self.doc_version.pages.first() - - page.update_text_field(io.StringIO('Hello Page!')) - response = self.client.get( - reverse('pages_page', args=(page.pk,)), - HTTP_ACCEPT='image/svg+xml' - ) - - # SVG image is not yet available, but - # at least status code is not 500 - assert response.status_code == 404 - - @patch('papermerge.core.signals.ocr_document_task') - def test_page_view_in_jpg_format(self, _): - """ - GET /pages/{id}/ - Accept: image/jpeg - """ - payload = open(self.resources / 'three-pages.pdf', 'rb') - doc = self.doc_version.document - doc.upload( - payload=payload, - file_path=self.resources / 'three-pages.pdf', - file_name='three-pages.pdf' - ) - page = self.doc_version.pages.first() - - page.update_text_field(io.StringIO('Hello Page!')) - response = self.client.get( - reverse('pages_page', args=(page.pk,)), - HTTP_ACCEPT='image/jpeg' - ) - - assert response.status_code == 200 - - @patch('papermerge.core.signals.ocr_document_task') - def test_page_view_in_text_format(self, _): - """ - GET /pages/{id}/ - Accept: text/plain - """ - self.doc_version.create_pages(page_count=1) - page = self.doc_version.pages.first() - - page.update_text_field(io.StringIO('Hello Page!')) - response = self.client.get( - reverse('pages_page', args=(page.pk,)), - HTTP_ACCEPT='text/plain' - ) - - assert response.status_code == 200 - assert response.content.decode('utf-8') == 'Hello Page!' - - @patch('papermerge.core.signals.ocr_document_task') - def test_page_delete(self, _x): - """ - DELETE /pages/{id}/ - """ - doc = self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - third_page = pages.all()[2] - - for page in pages: - page.update_text_field(io.StringIO(f'Hello Page {page.number}!')) - - # at this point document has only one version - assert doc.versions.count() == 1 - # last version has 3 pages - last_version = doc.versions.last() - assert last_version.pages.count() == 3 - pdf_file = pikepdf.Pdf.open(abs_path(last_version.document_path)) - assert len(pdf_file.pages) == 3 - - # delete last (i.e. 3rd) page - response = self.client.delete( - reverse('pages_page', args=(third_page.pk,)), - ) - assert response.status_code == 204 - - # at this point document has two versions - assert doc.versions.count() == 2 - # last version has 2 pages - last_version = doc.versions.last() - assert last_version.pages.count() == 2 - pdf_file = pikepdf.Pdf.open(abs_path(last_version.document_path)) - assert len(pdf_file.pages) == 2 - pdf_file.close() - - @patch('papermerge.core.signals.ocr_document_task') - def test_page_delete_preserves_text_fields(self, _): - """ - After deleting a page a new document will be created. - The pages of new version will reuse text field from document's - previous version. In this test we consider a document with two pages - - page one contains text 'fish' - - page two conains text 'cat' - We delete first page ('fish' page). Newly created document - version will have one page with text 'cat' in it. - """ - doc = self._upload(self.doc, 'living-things.pdf') - pages = self.doc_version.pages.all() - - for page, text in zip(pages, ['fish', 'cat']): - page.update_text_field(io.StringIO(text)) - - fish_page = pages[0] - assert fish_page.text == 'fish' - - response = self.client.delete( - reverse('pages_page', args=(fish_page.pk,)), - ) - assert response.status_code == 204 - last_version = doc.versions.last() - assert last_version.pages.count() == 1 - - cat_page = last_version.pages.all()[0] - # assert that text field is reused across document versions - assert cat_page.text == 'cat' - # document's version text field was updated as well - assert last_version.text == 'cat' - - @patch('papermerge.core.signals.ocr_document_task') - def test_page_delete_archived_page(self, _): - """ - Assert that deleting an archived page is not allowed. - """ - doc = self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - third_page = pages.all()[2] - - # Once document version is bump, all pages referenced - # by `pages` variable become archived - doc.version_bump() - - # try to delete archived page - response = self.client.delete( - reverse('pages_page', args=(third_page.pk,)), - ) - assert response.status_code == 400 - err_msg = response.data[0]['detail'] - assert err_msg == 'Deleting archived page is not allowed' - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_delete(self, _): - """ - DELETE /pages/ - Content-Type: application/json - { - "pages": [1, 2, 3] - } - """ - doc = self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - page_ids = [page.pk for page in pages] - - for page in pages: - page.update_text_field(io.StringIO(f'Hello Page {page.number}!')) - - # at this point document has only one version - assert doc.versions.count() == 1 - # last version has 3 pages - last_version = doc.versions.last() - assert last_version.pages.count() == 3 - - response = self.client.delete( - reverse('pages'), - data={ - "pages": page_ids[-2:] # delete last two pages - }, - format='json' - ) - assert response.status_code == 204 - - # at this point document has two versions - assert doc.versions.count() == 2 - # last version has only one page left - last_version = doc.versions.last() - assert last_version.pages.count() == 1 - pdf_file = pikepdf.Pdf.open(abs_path(last_version.document_path)) - assert len(pdf_file.pages) == 1 - pdf_file.close() - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_delete_preserves_text_fields(self, _): - """ - After deleting two pages new document will be created. - The pages of new version will reuse text field from document's - previous version. In this test we consider a document with three pages - - page one contains text 'page 1' - - page two contains text 'page 2' - - page two contains text 'page 3' - We delete first page two pages. Newly created document - version will have one page with text 'page 3' in it. - """ - doc = self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - - for page, text in zip(pages, ['page 1', 'page 2', 'page 3']): - page.update_text_field(io.StringIO(text)) - - page_1 = pages[0] - page_2 = pages[1] - assert page_1.text == 'page 1' - assert page_2.text == 'page 2' - - data = { - 'pages': [page_1.pk, page_2.pk] - } - - # delete first two pages - response = self.client.delete(reverse('pages'), data, format='json') - - assert response.status_code == 204 - last_version = doc.versions.last() - assert last_version.pages.count() == 1 - - last_page = last_version.pages.all()[0] - # assert that text field is reused across document versions - assert last_page.text == 'page 3' - # document's version text field was updated as well - assert last_version.text == 'page 3' - - @patch('papermerge.core.signals.ocr_document_task') - def test_document_ver_must_have_at_least_one_page_delete_one_by_one( - self, - _x - ): - """ - Document version must have at least one page. - - In this scenario document version has 3 pages. - Deleting first two pages one by one should be OK. - However, after first two steps, document version will have only - one page left; in such case deleting that last page should - result in error. - """ - self._upload(self.doc, 'three-pages.pdf') - # Delete pages one by one. - # Deleting first page should be OK - page_id = self.doc.versions.last().pages.last().pk - response = self.client.delete( - reverse('pages'), - data={ - "pages": [page_id] - }, - format='json' - ) - assert response.status_code == 204 - assert response.data == { - 'pages': [ - str(page_id) - ] - } - # Deleting next page should be OK as well - page_id = self.doc.versions.last().pages.last().pk - response = self.client.delete( - reverse('pages'), - data={ - "pages": [page_id] - }, - format='json' - ) - assert response.status_code == 204 - assert response.data == { - 'pages': [ - str(page_id) - ] - } - # Deleting last page should result in error - page_id = self.doc.versions.last().pages.last().pk - response = self.client.delete( - reverse('pages'), - data={ - "pages": [page_id] - }, - format='json' - ) - assert response.status_code == 400 - err_msg = response.data[0]['detail'] - assert err_msg == 'Document version must have at least one page' - - last_version = self.doc.versions.last() - assert last_version.pages.count() == 1 - pdf_file = pikepdf.Pdf.open(abs_path(last_version.document_path)) - assert len(pdf_file.pages) == 1 - pdf_file.close() - - @patch('papermerge.core.signals.ocr_document_task') - def test_document_ver_must_have_at_least_one_page_delete_bulk(self, _): - """ - Document version must have at least one page. - - In this scenario document version has 3 pages. - Deleting all three pages should result in error because otherwise - it will heave document version with 0 pages. - """ - self._upload(self.doc, 'three-pages.pdf') - page_ids = [page.pk for page in self.doc.versions.last().pages.all()] - response = self.client.delete( - reverse('pages'), - # trying to delete ALL pages in document version - data={"pages": page_ids}, - format='json' - ) - assert response.status_code == 400 - err_msg = response.data[0]['detail'] - assert err_msg == 'Document version must have at least one page' - - # no page was deleted - last_version = self.doc.versions.last() - assert last_version.pages.count() == 3 - pdf_file = pikepdf.Pdf.open(abs_path(last_version.document_path)) - assert len(pdf_file.pages) == 3 - pdf_file.close() - - @patch('papermerge.core.signals.ocr_document_task') - def test_delete_pages_from_archived_version(self, _x): - """ - Archived document version is any document version which is not last. - Only last document version is editable - in the context of - this scenario, only pages of very last document version - can be deleted. - - In this scenario page deletion performed via `pages` endpoint. - """ - self._upload(self.doc, 'three-pages.pdf') - # all pages are from same document version - # which at this moment is last document version - page_ids = [page.pk for page in self.doc.versions.last().pages.all()] - # Deleting - response = self.client.delete( - reverse('pages'), - data={ - "pages": [page_ids[0]] - }, - format='json' - ) - assert response.status_code == 204 - # At this point page_ids are not part of - # document last document version (because previous - # page deletion incremented document version by one). - # If we try to delete page_ids[1] now, it must result - # in error because we are trying to edit an archived document version - response = self.client.delete( - reverse('pages'), - data={ - "pages": [page_ids[1]] - }, - format='json' - ) - assert response.status_code == 400 - err_msg = response.data[0]['detail'] - assert err_msg == 'Deleting archived page is not allowed' - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_reorder(self, _): - self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - pages_data = [ - { - 'id': pages[0].id, - 'old_number': pages[0].number, # = 1 - 'new_number': 3 - }, { - 'id': pages[1].id, - 'old_number': pages[1].number, # = 2 - 'new_number': 2 - }, { - 'id': pages[2].id, - 'old_number': pages[2].number, # = 3 - 'new_number': 1 - }, - ] - - response = self.client.post( - reverse('pages_reorder'), - data={ - "pages": pages_data # reorder pages - }, - format='json' - ) - - assert response.status_code == 204 - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_reorder_preserves_text_fields(self, _): - """ - Test that after changing order of page in the document, - """ - self._upload(self.doc, 'living-things.pdf') - pages = self.doc.versions.last().pages.all() - - for page, text in zip(pages, ['fish', 'cat']): - page.update_text_field(io.StringIO(text)) - - assert pages[0].text == 'fish' - assert pages[0].number == 1 - assert pages[1].text == 'cat' - assert pages[1].number == 2 - - pages_data = [ - { - 'id': pages[0].id, - 'old_number': pages[0].number, # = 1 - 'new_number': 2 - }, { - 'id': pages[1].id, - 'old_number': pages[1].number, # = 2 - 'new_number': 1 - } - ] - - response = self.client.post( - reverse('pages_reorder'), - data={ - "pages": pages_data # reorder pages - }, - format='json' - ) - - assert response.status_code == 204 - - assert self.doc.versions.count() == 2 - last_version = self.doc.versions.last() - pages = last_version.pages.all() - assert pages[0].text == 'cat' - assert pages[0].number == 1 - assert pages[1].text == 'fish' - assert pages[1].number == 2 - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_reorder_reuses_ocr_data(self, _): - """ - Asserts that page reorder reuses correctly OCR data. - - Only txt data file is checked here - """ - self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - # because the real OCRing is not triggered (too slow) we - # create our own versions of txt data - # Currently: - # page 1 contains text 'I am page 3' - # page 2 contains text 'I am page 2' - # page 3 contains text 'I am page 1' - current_order = [3, 1, 2] - for index in range(0, 3): - os.makedirs( - os.path.dirname(abs_path(pages[index].txt_url)), - exist_ok=True - ) - with open(abs_path(pages[index].txt_url), 'w+') as f: - f.write(f'I am page {current_order[index]}') - - pages_data = [ - { - 'id': pages[0].id, - 'old_number': pages[0].number, # = 1 - 'new_number': 3 - }, { - 'id': pages[1].id, - 'old_number': pages[1].number, # = 2 - 'new_number': 1 - }, { - 'id': pages[2].id, - 'old_number': pages[2].number, # = 3 - 'new_number': 2 - }, - ] - - response = self.client.post( - reverse('pages_reorder'), - data={ - "pages": pages_data # reorder pages - }, - format='json' - ) - - assert response.status_code == 204 - assert self.doc.versions.count() == 2 - - new_pages = self.doc.versions.last().pages.all() - # page 1 should contain text 'I am page 1' - # page 2 should contain text 'I am page 2' - # page 3 should contain text 'I am page 3' - for index in range(0, 3): - assert new_pages[index].number == index + 1 - with open(abs_path(new_pages[index].txt_url)) as f: - text = f.read() - assert text == f'I am page {index + 1}' - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_rotate(self, _): - self._upload(self.doc, 'three-pages.pdf') - pages = self.doc_version.pages.all() - pages_data = [ - { - 'id': pages[0].id, - 'angle': 90 - } - ] - - response = self.client.post( - reverse('pages_rotate'), - data={ - "pages": pages_data # rotate pages - }, - format='json' - ) - - assert response.status_code == 204 - - @patch('papermerge.core.signals.ocr_document_task') - def test_pages_rotate_preserves_text_field(self, _): - self._upload(self.doc, 'living-things.pdf') - pages = self.doc_version.pages.all() - - for page, text in zip(pages, ['fish', 'cat']): - page.update_text_field(io.StringIO(text)) - - fish_page = pages[0] - assert fish_page.text == 'fish' - - pages_data = [ - { - 'id': pages[0].id, - 'angle': 90 - } - ] - - response = self.client.post( - reverse('pages_rotate'), - data={ - "pages": pages_data # rotate pages - }, - format='json' - ) - - assert response.status_code == 204 - - last_version = self.doc.versions.last() - assert last_version.pages.count() == 2 - - fish_page = last_version.pages.all()[0] - # assert that text field is reused across document versions - assert fish_page.text == 'fish' - - cat_page = last_version.pages.all()[1] - # assert that text field is reused across document versions - assert cat_page.text == 'cat' - - # document's version text field was updated as well - assert last_version.text == 'fish cat' - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_document_1(self, _): - """ - Move two pages from source document to destination document. - - Initially both source and destination document have - one document_version with three pages each. - If page move (two pages from source moved to destination) - is completed successfully, destination document's latest version will - have five pages and source document's latest version will have one - page. - """ - source = Document.objects.create_document( - title="source.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - destination = Document.objects.create_document( - title="destination.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(source, 'three-pages.pdf') - self._upload(destination, 'three-pages.pdf') - - source_page_ids = [ - page.id for page in source.versions.last().pages.all()[0:2] - ] - - pages_data = { - 'pages': source_page_ids, - 'dst': destination.id, - 'position': 0 - } - response = self.client.post( - reverse('pages_move_to_document'), - data=pages_data, - format='json' - ) - - assert response.status_code == 204 - - # source document has one extra version - assert source.versions.count() == 2 - src_doc_version = source.versions.last() - assert src_doc_version.pages.count() == 1 - pdf_file = pikepdf.Pdf.open(abs_path(src_doc_version.document_path)) - # payload of source's last version has now one page - assert len(pdf_file.pages) == 1 - - # destination document has one extra version - assert destination.versions.count() == 2 - dst_doc_version = destination.versions.last() - assert dst_doc_version.pages.count() == 5 - # payload of destination's last version has now 5 pages - pdf_file = pikepdf.Pdf.open(abs_path(dst_doc_version.document_path)) - assert len(pdf_file.pages) == 5 - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_document_preserves_text_field(self, _): - source = Document.objects.create_document( - title="source.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - destination = Document.objects.create_document( - title="destination.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(source, 'three-pages.pdf') - self._update_text_field(source, ['cat', 'dog', 'monkey']) - self._upload(destination, 'three-pages.pdf') - self._update_text_field(destination, ['flower', 'tree', 'plant']) - - source_page_ids = [ - page.id for page in source.versions.last().pages.all()[0:2] - ] - # move first two pages from source to destination - pages_data = { - 'pages': source_page_ids, - 'dst': destination.id, - 'position': 0 - } - response = self.client.post( - reverse('pages_move_to_document'), - data=pages_data, - format='json' - ) - - assert response.status_code == 204 - - source_pages = source.versions.last().pages.all() - destination_pages = destination.versions.last().pages.all() - # Initially both source and destination had three pages. - # After moving two pages from one source to destination - # source will have only one page and destination five. - assert source_pages.count() == 1 - assert destination_pages.count() == 5 - assert source_pages[0].text == 'monkey' - - assert destination_pages[0].text == 'cat' - assert destination_pages[1].text == 'dog' - assert destination_pages[2].text == 'flower' - assert destination_pages[3].text == 'tree' - assert destination_pages[4].text == 'plant' - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_document_reuses_ocr_data_copy_to_position_1(self, _): - """ - Given two documents doc_a and doc_b, when moving - page two pages from doc_a to doc_b, OCR data is moved correctly. - - Both documents have three pages. - This test copies two pages from document doc_a.pdf to doc_b.pdf - to position 1. - """ - doc_a, doc_b, pages_a, pages_b = self._setup_pages_move_to_document() - - response = self.client.post( - reverse('pages_move_to_document'), - data={ - "pages": [pages_a[1].pk, pages_a[2].pk], - "dst": doc_b.pk, - "position": 1 - }, - format='json' - ) - - assert response.status_code == 204 - - assert doc_a.versions.count() == 2 - assert doc_b.versions.count() == 2 - - new_pages_b = doc_b.versions.last().pages.all() - - new_pages_text = [ - 'I am page doc_b_1', - 'I am page doc_a_2', - 'I am page doc_a_3', - 'I am page doc_b_2', - 'I am page doc_b_3' - ] - - for index in range(0, 5): - with open(abs_path(new_pages_b[index].txt_url)) as f: - text = f.read() - assert text == new_pages_text[index] - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_document_reuses_ocr_data_copy_to_position_0(self, _): - """ - Given two documents doc_a and doc_b, when moving - page two pages from doc_a to doc_b, OCR data is moved correctly. - - Both documents have three pages. - This test copies two pages from document doc_a.pdf to doc_b.pdf - to position 0. - """ - doc_a, doc_b, pages_a, pages_b = self._setup_pages_move_to_document() - - response = self.client.post( - reverse('pages_move_to_document'), - data={ - "pages": [pages_a[1].pk, pages_a[2].pk], - "dst": doc_b.pk, - "position": 0 # copy pages to the beginning of the target doc - }, - format='json' - ) - - assert response.status_code == 204 - - assert doc_a.versions.count() == 2 - assert doc_b.versions.count() == 2 - - new_pages_b = doc_b.versions.last().pages.all() - - # newly copied pages are the beginning of the target document - new_pages_text = [ - 'I am page doc_a_2', - 'I am page doc_a_3', - 'I am page doc_b_1', - 'I am page doc_b_2', - 'I am page doc_b_3' - ] - - for index in range(0, 5): - with open(abs_path(new_pages_b[index].txt_url)) as f: - text = f.read() - assert text == new_pages_text[index] - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_folder_with_single_page_flag_on(self, _): - """ - Move two pages from source document to destination folder - with single page flag 'on'. - - Initially both source and destination document have - one document_version with three pages each. - If page move (two pages from source moved to destination) - is completed successfully, in destination folder's - will contains two new documents with one page each. - """ - - source = Document.objects.create_document( - title="source.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(source, 'three-pages.pdf') - destination_folder = Folder.objects.create( - title="Destination Folder", - user_id=self.user.pk, - parent=self.user.home_folder - ) - source_page_ids = [ - page.id for page in source.versions.last().pages.all()[0:2] - ] - - pages_data = { - 'pages': source_page_ids, - 'dst': destination_folder.id, - 'single_page': True - } - response = self.client.post( - reverse('pages_move_to_folder'), - data=pages_data, - format='json' - ) - - assert response.status_code == 204 - - assert source.versions.count() == 2 - src_doc_version = source.versions.last() - # new version of the source document will have two - # pages less (two pages were extracted) - assert src_doc_version.pages.count() == 1 - pdf_file = pikepdf.Pdf.open(abs_path(src_doc_version.document_path)) - # payload of source's last version has now one page - assert len(pdf_file.pages) == 1 - - assert destination_folder.children.count() == 2 - - for child in destination_folder.children.all(): - last_ver = child.document.versions.last() - pdf_file = pikepdf.Pdf.open(abs_path(last_ver.document_path)) - # (last version of) newly created document has only one pages - assert len(pdf_file.pages) == 1 - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_folder_single_paged_preserves_text_field(self, _): - source = Document.objects.create_document( - title="living-things.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(source, 'living-things.pdf') - source_pages = self._update_text_field(source, ['fish', 'cat']) - destination_folder = Folder.objects.create( - title="Destination Folder", - user_id=self.user.pk, - parent=self.user.home_folder - ) - - pages_data = { - 'pages': [source_pages[1].pk], - 'dst': destination_folder.id, - 'single_page': True - } - response = self.client.post( - reverse('pages_move_to_folder'), - data=pages_data, - format='json' - ) - - assert response.status_code == 204 - - source_last_version = source.versions.last() - source_pages = source_last_version.pages.all() - assert source_pages[0].text == 'fish' - - # newly created one page document - destination_doc = destination_folder.children.last() # and only - destination_pages = destination_doc.document.versions.last().pages.all() - assert destination_pages[0].text == 'cat' - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_folder_with_multi_page(self, _): - """ - Move two pages from source document to destination folder - with single page flag 'off'. - - Initially both source and destination document have - one document_version with three pages each. - If page move (two pages from source moved to destination) - is completed successfully, in destination folder's - will contains one new document with two pages. - """ - source = Document.objects.create_document( - title="source.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(source, 'three-pages.pdf') - destination_folder = Folder.objects.create( - title="Destination Folder", - user_id=self.user.pk, - parent=self.user.home_folder - ) - source_page_ids = [ - page.id for page in source.versions.last().pages.all()[0:2] - ] - - pages_data = { - 'pages': source_page_ids, - 'dst': destination_folder.id, - 'single_page': False - } - response = self.client.post( - reverse('pages_move_to_folder'), - data=pages_data, - format='json' - ) - - assert response.status_code == 204 - - assert source.versions.count() == 2 - src_doc_version = source.versions.last() - # new version of the source document will have two - # pages less (two pages were extracted) - assert src_doc_version.pages.count() == 1 - pdf_file = pikepdf.Pdf.open(abs_path(src_doc_version.document_path)) - # payload of source's last version has now one page - assert len(pdf_file.pages) == 1 - - assert destination_folder.children.count() == 1 - - newly_created_document = destination_folder.children.first() - last_ver = newly_created_document.document.versions.last() - pdf_file = pikepdf.Pdf.open(abs_path(last_ver.document_path)) - # (last version of) newly created document has two pages - assert len(pdf_file.pages) == 2 - - @patch('papermerge.core.signals.ocr_document_task') - def test_move_to_folder_multi_paged_preserves_text_field(self, _): - source = Document.objects.create_document( - title="three-pages.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(source, 'three-pages.pdf') - source_pages = self._update_text_field(source, ['fish', 'cat', 'doc']) - destination_folder = Folder.objects.create( - title="Destination Folder", - user_id=self.user.pk, - parent=self.user.home_folder - ) - - pages_data = { - 'pages': [source_pages[1].pk, source_pages[2].pk], - 'dst': destination_folder.id, - 'single_page': False - } - response = self.client.post( - reverse('pages_move_to_folder'), - data=pages_data, - format='json' - ) - - assert response.status_code == 204 - - source_last_version = source.versions.last() - source_pages = source_last_version.pages.all() - assert source_pages[0].text == 'fish' - - # newly created one page document - destination_doc = destination_folder.children.last() # and only - destination_pages = destination_doc.document.versions.last().pages.all() - - assert destination_pages[0].text == 'cat' - assert destination_pages[1].text == 'doc' - - def _upload(self, doc, file_name): - payload = open(self.resources / file_name, 'rb') - doc.upload( - payload=payload, - file_path=self.resources / file_name, - file_name=file_name - ) - payload.close() - return doc - - def _update_text_field(self, doc, list_of_page_strings): - pages = doc.versions.last().pages.all() - for page, text in zip(pages, list_of_page_strings): - page.update_text_field(io.StringIO(text)) - - return pages - - def _setup_pages_move_to_document(self): - doc_a = Document.objects.create_document( - title="doc_a.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - doc_b = Document.objects.create_document( - title="doc_A.pdf", - lang="deu", - user_id=self.user.pk, - parent=self.user.home_folder - ) - self._upload(doc_a, 'three-pages.pdf') - self._upload(doc_b, 'three-pages.pdf') - pages_a = doc_a.versions.last().pages.all() - pages_b = doc_b.versions.last().pages.all() - - for index in range(0, 3): - os.makedirs( - os.path.dirname(abs_path(pages_a[index].txt_url)), - exist_ok=True - ) - with open(abs_path(pages_a[index].txt_url), 'w+') as f: - f.write(f'I am page doc_a_{index + 1}') - - for index in range(0, 3): - os.makedirs( - os.path.dirname(abs_path(pages_b[index].txt_url)), - exist_ok=True - ) - with open(abs_path(pages_b[index].txt_url), 'w+') as f: - f.write(f'I am page doc_b_{index + 1}') - - return doc_a, doc_b, pages_a, pages_b diff --git a/tests/core/views/test_permissions.py b/tests/core/views/test_permissions.py deleted file mode 100644 index ee7d82f4f..000000000 --- a/tests/core/views/test_permissions.py +++ /dev/null @@ -1,35 +0,0 @@ -import pytest -from django.urls import reverse - -from papermerge.test import TestCase, perms - - -@pytest.mark.skip() -class PermissionsViewPermissionsTestCase(TestCase): - """ - Assert that only user with 'view_permission' permission - can list (almost) ALL permissions available in the system. - In other words: - - GET /api/permissions/ - - access is granted only to the users with 'view_permission' perm. - """ - - def test_view_all_system_permissions_forbidden_for_default_user(self): - """ - User without 'view_permission' permission, user cannot list - all permissions available in the system i.e. - consume 'permission-list' - """ - response = self.client.get(reverse('permission-list')) - assert response.status_code == 403 - - @perms('view_permission') - def test_view_roles_allowed_for_user_with_view_perm(self): - """ - Access to 'permission-list' is granted if user has - view_permission' permission - """ - response = self.client.get(reverse('permission-list')) - assert response.status_code == 200 diff --git a/tests/core/views/test_preferences.py b/tests/core/views/test_preferences.py deleted file mode 100644 index c2997b430..000000000 --- a/tests/core/views/test_preferences.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest -from django.urls import reverse - -from papermerge.test import TestCase, perms - - -@pytest.mark.skip() -class PreferencesViewPermissionsTestCase(TestCase): - - def test_preferences_view_forbidden_for_default_user(self): - response = self.client.get(reverse('preferences-list')) - assert response.status_code == 403 - - @perms('view_userpreferencemodel') - def test_prefs_view_allowed_for_user_with_view_perm(self): - response = self.client.get(reverse('preferences-list')) - assert response.status_code == 200