From 5a2c7ca3e377448386030178424819cf0cfb89f5 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Sun, 12 Mar 2023 16:11:26 -0700 Subject: [PATCH] Formatting --- .../annotation_types/ner/document_entity.py | 1 + labelbox/data/serialization/ndjson/objects.py | 4 +- tests/data/annotation_types/test_ner.py | 11 +++- .../integration/annotation_import/conftest.py | 14 ++++- .../test_bulk_import_request.py | 52 +++++++++---------- tests/integration/conftest.py | 10 ++-- 6 files changed, 56 insertions(+), 36 deletions(-) diff --git a/labelbox/data/annotation_types/ner/document_entity.py b/labelbox/data/annotation_types/ner/document_entity.py index cdad06b6e6..921e3560d0 100644 --- a/labelbox/data/annotation_types/ner/document_entity.py +++ b/labelbox/data/annotation_types/ner/document_entity.py @@ -14,6 +14,7 @@ def validate_page(cls, v): raise ValueError("Page must be greater than 1") return v + class DocumentEntity(BaseModel): """ Represents a text entity """ name: str diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py index 717d8580da..001b7f49ed 100644 --- a/labelbox/data/serialization/ndjson/objects.py +++ b/labelbox/data/serialization/ndjson/objects.py @@ -372,19 +372,17 @@ def from_common(cls, confidence=confidence) - - class NDDocumentEntity(NDBaseObject, ConfidenceMixin): name: str textSelections: List[DocumentTextSelection] - def to_common(self) -> DocumentEntity: return TextEntity(name=self.name, textSelections=self.textSelections) return obj.from_common(annotation.value, subclasses, annotation.name, annotation.feature_schema_id, annotation.extra, data, **optional_kwargs) + @classmethod def from_common(cls, document_entity: DocumentEntity, diff --git a/tests/data/annotation_types/test_ner.py b/tests/data/annotation_types/test_ner.py index 176851e1e6..768a6b3130 100644 --- a/tests/data/annotation_types/test_ner.py +++ b/tests/data/annotation_types/test_ner.py @@ -8,10 +8,17 @@ def test_ner(): assert text_entity.start == start assert text_entity.end == end + def test_document_entity(): - document_entity = DocumentEntity(name="tool_name", textSelections=[DocumentTextSelection(tokenIds=["1", "2"], groupId="1", page=1)]) + document_entity = DocumentEntity(name="tool_name", + textSelections=[ + DocumentTextSelection( + tokenIds=["1", "2"], + groupId="1", + page=1) + ]) assert document_entity.name == "tool_name" assert document_entity.textSelections[0].tokenIds == ["1", "2"] assert document_entity.textSelections[0].groupId == "1" - assert document_entity.textSelections[0].page == 1 \ No newline at end of file + assert document_entity.textSelections[0].page == 1 diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 0ec52edbad..994e00efe3 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -114,7 +114,13 @@ def ontology(): 'options': [] }] } - named_entity = {'tool': 'named-entity', 'name': 'named-entity', 'required': False, 'color': '#A30059', 'classifications': [], } + named_entity = { + 'tool': 'named-entity', + 'name': 'named-entity', + 'required': False, + 'color': '#A30059', + 'classifications': [], + } tools = [ bbox_tool, polygon_tool, polyline_tool, point_tool, entity_tool, @@ -160,6 +166,7 @@ def configured_project_pdf(client, ontology, rand_gen, pdf_url): project.delete() dataset.delete() + @pytest.fixture def dataset_pdf_entity(client, rand_gen, pdf_entity_row_data): dataset = client.create_dataset(name=rand_gen(str)) @@ -169,8 +176,10 @@ def dataset_pdf_entity(client, rand_gen, pdf_entity_row_data): yield dataset, data_row_ids dataset.delete() + @pytest.fixture -def configured_project_pdf_entity(client, ontology, rand_gen, dataset_pdf_entity): +def configured_project_pdf_entity(client, ontology, rand_gen, + dataset_pdf_entity): project = client.create_project(name=rand_gen(str), queue_mode=QueueMode.Dataset) @@ -185,6 +194,7 @@ def configured_project_pdf_entity(client, ontology, rand_gen, dataset_pdf_entity yield project project.delete() + @pytest.fixture def configured_project_without_data_rows(client, configured_project, rand_gen): project = client.create_project(name=rand_gen(str), diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 55322e2ba4..ed2c7597b9 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -293,36 +293,36 @@ def test_pdf_mal_bbox(client, configured_project_pdf): assert import_annotations.errors == [] -def test_pdf_document_entity(client, configured_project_pdf_entity, dataset_pdf_entity): +def test_pdf_document_entity(client, configured_project_pdf_entity, + dataset_pdf_entity): # for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json - document_text_selection = DocumentTextSelection(groupId= "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - tokenIds=["3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c"], - page=1) - - entities_annotation_document_entity = DocumentEntity(name="named_entity", - textSelections = [document_text_selection]) - entities_annotation = ObjectAnnotation(name="named-entity", - value=entities_annotation_document_entity) + document_text_selection = DocumentTextSelection( + groupId="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + tokenIds=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + page=1) + + entities_annotation_document_entity = DocumentEntity( + name="named_entity", textSelections=[document_text_selection]) + entities_annotation = ObjectAnnotation( + name="named-entity", value=entities_annotation_document_entity) labels = [] _, data_row_uids = dataset_pdf_entity for data_row_uid in data_row_uids: - labels.append(Label( - data= TextData( - uid=data_row_uid), - annotations = [ - entities_annotation, - ] - ) - ) - + labels.append( + Label(data=TextData(uid=data_row_uid), + annotations=[ + entities_annotation, + ])) import_annotations = MALPredictionImport.create_from_objects( client=client, @@ -331,4 +331,4 @@ def test_pdf_document_entity(client, configured_project_pdf_entity, dataset_pdf_ predictions=labels) import_annotations.wait_until_done() - assert import_annotations.errors == [] \ No newline at end of file + assert import_annotations.errors == [] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index d4ff8eecb9..60bd494c2e 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -175,10 +175,14 @@ def image_url(client): def pdf_url(client): return client.upload_file('tests/assets/loremipsum.pdf') + @pytest.fixture(scope="session") def pdf_entity_row_data(client): - pdf_url = client.upload_file('tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483.pdf') - text_layer_url = client.upload_file('tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json') + pdf_url = client.upload_file( + 'tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483.pdf') + text_layer_url = client.upload_file( + 'tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json' + ) return { "row_data": { @@ -186,7 +190,7 @@ def pdf_entity_row_data(client): "text_layer_url": text_layer_url }, "global_key": str(uuid.uuid4()) - } + } @pytest.fixture