Skip to content

Commit

Permalink
Formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
vbrodsky committed Mar 12, 2023
1 parent 8200151 commit 5a2c7ca
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 36 deletions.
1 change: 1 addition & 0 deletions labelbox/data/annotation_types/ner/document_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def validate_page(cls, v):
raise ValueError("Page must be greater than 1")
return v


class DocumentEntity(BaseModel):
""" Represents a text entity """
name: str
Expand Down
4 changes: 1 addition & 3 deletions labelbox/data/serialization/ndjson/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,19 +372,17 @@ def from_common(cls,
confidence=confidence)




class NDDocumentEntity(NDBaseObject, ConfidenceMixin):
name: str
textSelections: List[DocumentTextSelection]


def to_common(self) -> DocumentEntity:
return TextEntity(name=self.name, textSelections=self.textSelections)

return obj.from_common(annotation.value, subclasses, annotation.name,
annotation.feature_schema_id, annotation.extra,
data, **optional_kwargs)

@classmethod
def from_common(cls,
document_entity: DocumentEntity,
Expand Down
11 changes: 9 additions & 2 deletions tests/data/annotation_types/test_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@ def test_ner():
assert text_entity.start == start
assert text_entity.end == end


def test_document_entity():
document_entity = DocumentEntity(name="tool_name", textSelections=[DocumentTextSelection(tokenIds=["1", "2"], groupId="1", page=1)])
document_entity = DocumentEntity(name="tool_name",
textSelections=[
DocumentTextSelection(
tokenIds=["1", "2"],
groupId="1",
page=1)
])

assert document_entity.name == "tool_name"
assert document_entity.textSelections[0].tokenIds == ["1", "2"]
assert document_entity.textSelections[0].groupId == "1"
assert document_entity.textSelections[0].page == 1
assert document_entity.textSelections[0].page == 1
14 changes: 12 additions & 2 deletions tests/integration/annotation_import/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,13 @@ def ontology():
'options': []
}]
}
named_entity = {'tool': 'named-entity', 'name': 'named-entity', 'required': False, 'color': '#A30059', 'classifications': [], }
named_entity = {
'tool': 'named-entity',
'name': 'named-entity',
'required': False,
'color': '#A30059',
'classifications': [],
}

tools = [
bbox_tool, polygon_tool, polyline_tool, point_tool, entity_tool,
Expand Down Expand Up @@ -160,6 +166,7 @@ def configured_project_pdf(client, ontology, rand_gen, pdf_url):
project.delete()
dataset.delete()


@pytest.fixture
def dataset_pdf_entity(client, rand_gen, pdf_entity_row_data):
dataset = client.create_dataset(name=rand_gen(str))
Expand All @@ -169,8 +176,10 @@ def dataset_pdf_entity(client, rand_gen, pdf_entity_row_data):
yield dataset, data_row_ids
dataset.delete()


@pytest.fixture
def configured_project_pdf_entity(client, ontology, rand_gen, dataset_pdf_entity):
def configured_project_pdf_entity(client, ontology, rand_gen,
dataset_pdf_entity):
project = client.create_project(name=rand_gen(str),
queue_mode=QueueMode.Dataset)

Expand All @@ -185,6 +194,7 @@ def configured_project_pdf_entity(client, ontology, rand_gen, dataset_pdf_entity
yield project
project.delete()


@pytest.fixture
def configured_project_without_data_rows(client, configured_project, rand_gen):
project = client.create_project(name=rand_gen(str),
Expand Down
52 changes: 26 additions & 26 deletions tests/integration/annotation_import/test_bulk_import_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,36 +293,36 @@ def test_pdf_mal_bbox(client, configured_project_pdf):
assert import_annotations.errors == []


def test_pdf_document_entity(client, configured_project_pdf_entity, dataset_pdf_entity):
def test_pdf_document_entity(client, configured_project_pdf_entity,
dataset_pdf_entity):
# for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json
document_text_selection = DocumentTextSelection(groupId= "2f4336f4-a07e-4e0a-a9e1-5629b03b719b",
tokenIds=["3f984bf3-1d61-44f5-b59a-9658a2e3440f",
"3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8",
"6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80",
"87a43d32-af76-4a1d-b262-5c5f4d5ace3a",
"e8606e8a-dfd9-4c49-a635-ad5c879c75d0",
"67c7c19e-4654-425d-bf17-2adb8cf02c30",
"149c5e80-3e07-49a7-ab2d-29ddfe6a38fa",
"b0e94071-2187-461e-8e76-96c58738a52c"],
page=1)

entities_annotation_document_entity = DocumentEntity(name="named_entity",
textSelections = [document_text_selection])
entities_annotation = ObjectAnnotation(name="named-entity",
value=entities_annotation_document_entity)
document_text_selection = DocumentTextSelection(
groupId="2f4336f4-a07e-4e0a-a9e1-5629b03b719b",
tokenIds=[
"3f984bf3-1d61-44f5-b59a-9658a2e3440f",
"3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8",
"6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80",
"87a43d32-af76-4a1d-b262-5c5f4d5ace3a",
"e8606e8a-dfd9-4c49-a635-ad5c879c75d0",
"67c7c19e-4654-425d-bf17-2adb8cf02c30",
"149c5e80-3e07-49a7-ab2d-29ddfe6a38fa",
"b0e94071-2187-461e-8e76-96c58738a52c"
],
page=1)

entities_annotation_document_entity = DocumentEntity(
name="named_entity", textSelections=[document_text_selection])
entities_annotation = ObjectAnnotation(
name="named-entity", value=entities_annotation_document_entity)

labels = []
_, data_row_uids = dataset_pdf_entity
for data_row_uid in data_row_uids:
labels.append(Label(
data= TextData(
uid=data_row_uid),
annotations = [
entities_annotation,
]
)
)

labels.append(
Label(data=TextData(uid=data_row_uid),
annotations=[
entities_annotation,
]))

import_annotations = MALPredictionImport.create_from_objects(
client=client,
Expand All @@ -331,4 +331,4 @@ def test_pdf_document_entity(client, configured_project_pdf_entity, dataset_pdf_
predictions=labels)
import_annotations.wait_until_done()

assert import_annotations.errors == []
assert import_annotations.errors == []
10 changes: 7 additions & 3 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,22 @@ def image_url(client):
def pdf_url(client):
return client.upload_file('tests/assets/loremipsum.pdf')


@pytest.fixture(scope="session")
def pdf_entity_row_data(client):
pdf_url = client.upload_file('tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483.pdf')
text_layer_url = client.upload_file('tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json')
pdf_url = client.upload_file(
'tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483.pdf')
text_layer_url = client.upload_file(
'tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json'
)

return {
"row_data": {
"pdf_url": pdf_url,
"text_layer_url": text_layer_url
},
"global_key": str(uuid.uuid4())
}
}


@pytest.fixture
Expand Down

0 comments on commit 5a2c7ca

Please sign in to comment.