Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] assertions #17098

Merged
5 commits merged into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
exists on the `HealthcareEntity`
- Add property `assertion` onto `HealthcareEntity`. This contains assertions about the entity itself, i.e. if the entity represents a diagnosis,
is this diagnosis conditional on a symptom?

## 5.1.0b5 (2021-02-10)

Expand Down
5 changes: 5 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,11 @@ for idx, doc in enumerate(docs):
for data_source in entity.data_sources:
print("......Entity ID: {}".format(data_source.entity_id))
print("......Name: {}".format(data_source.name))
if entity.assertion is not None:
print("...Assertion:")
print("......Conditionality: {}".format(entity.assertion.conditionality))
print("......Certainty: {}".format(entity.assertion.certainty))
print("......Association: {}".format(entity.assertion.association))
for relation in doc.entity_relations:
print("Relation of type: {} has the following roles".format(relation.relation_type))
for role in relation.roles:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@
HealthcareEntityRelationRoleType,
)
from ._paging import AnalyzeHealthcareEntitiesResult
from ._generated.v3_1_preview_4.models import RelationType as HealthcareEntityRelationType
from ._generated.v3_1_preview_4.models import (
RelationType as HealthcareEntityRelationType,
)

__all__ = [
'TextAnalyticsApiVersion',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,11 @@ class HealthcareEntity(DictMixin):
:ivar str category: Entity category, see the following link for health's named
entity types: https://aka.ms/text-analytics-health-entities
:ivar str subcategory: Entity subcategory.
:ivar assertion: Contains various assertions about this entity. For example, if
an entity is a diagnosis, is this diagnosis 'conditional' on a symptom?
Are the doctors 'certain' about this diagnosis? Is this diagnosis 'associated'
with another diagnosis?
:vartype assertion: ~azure.ai.textanalytics.HealthcareEntityAssertion
:ivar int length: The entity text length. This value depends on the value
of the `string_index_type` parameter specified in the original request, which is
UnicodeCodePoints by default.
Expand All @@ -515,18 +520,29 @@ def __init__(self, **kwargs):
self.normalized_text = kwargs.get("normalized_text", None)
self.category = kwargs.get("category", None)
self.subcategory = kwargs.get("subcategory", None)
self.assertion = kwargs.get("assertion", None)
self.length = kwargs.get("length", None)
self.offset = kwargs.get("offset", None)
self.confidence_score = kwargs.get("confidence_score", None)
self.data_sources = kwargs.get("data_sources", [])

@classmethod
def _from_generated(cls, healthcare_entity):
assertion = None
try:
if healthcare_entity.assertion:
assertion = HealthcareEntityAssertion._from_generated( # pylint: disable=protected-access
healthcare_entity.assertion
)
except AttributeError:
assertion = None

return cls(
text=healthcare_entity.text,
normalized_text=healthcare_entity.name,
category=healthcare_entity.category,
subcategory=healthcare_entity.subcategory,
assertion=assertion,
length=healthcare_entity.length,
offset=healthcare_entity.offset,
confidence_score=healthcare_entity.confidence_score,
Expand All @@ -539,18 +555,58 @@ def __hash__(self):
return hash(repr(self))

def __repr__(self):
return "HealthcareEntity(text={}, normalized_text={}, category={}, subcategory={}, length={}, offset={}, "\
"confidence_score={}, data_sources={})".format(
return "HealthcareEntity(text={}, normalized_text={}, category={}, subcategory={}, assertion={}, length={}, "\
"offset={}, confidence_score={}, data_sources={})".format(
self.text,
self.normalized_text,
self.category,
self.subcategory,
repr(self.assertion),
self.length,
self.offset,
self.confidence_score,
repr(self.data_sources),
)[:1024]

class HealthcareEntityAssertion(DictMixin):
"""Contains various assertions about a `HealthcareEntity`.

For example, if an entity is a diagnosis, is this diagnosis 'conditional' on a symptom?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like the docstring. I copied them for .NET :)

Are the doctors 'certain' about this diagnosis? Is this diagnosis 'associated'
with another diagnosis?

:ivar str conditionality: Describes whether the healthcare entity it's on is conditional on another entity.
For example, "If the patient has a fever, he has pneumonia", the diagnosis of pneumonia
is 'conditional' on whether the patient has a fever. Possible values are "hypothetical" and
"conditional".
:ivar str certainty: Describes how certain the healthcare entity it's on is. For example,
in "The patient may have a fever", the fever entity is not 100% certain, but is instead
"positivePossible". Possible values are "positive", "positivePossible", "neutralPossible",
"negativePossible", and "negative".
:ivar str association: Describes whether the healthcare entity it's on is the subject of the document, or
if this entity describes someone else in the document. For example, in "The subject's mother has
a fever", the "fever" entity is not associated with the subject themselves, but with the subject's
mother. Possible values are "subject" and "other".
"""

def __init__(self, **kwargs):
self.conditionality = kwargs.get("conditionality", None)
self.certainty = kwargs.get("certainty", None)
self.association = kwargs.get("association", None)

@classmethod
def _from_generated(cls, healthcare_assertion):
return cls(
conditionality=healthcare_assertion.conditionality,
certainty=healthcare_assertion.certainty,
association=healthcare_assertion.association,
)

def __repr__(self):
return "HealthcareEntityAssertion(conditionality={}, certainty={}, association={})".format(
self.conditionality, self.certainty, self.association
)


class HealthcareEntityDataSource(DictMixin):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ async def analyze_healthcare_entities_async(self):
for data_source in entity.data_sources:
print("......Entity ID: {}".format(data_source.entity_id))
print("......Name: {}".format(data_source.name))
if entity.assertion is not None:
print("...Assertion:")
print("......Conditionality: {}".format(entity.assertion.conditionality))
print("......Certainty: {}".format(entity.assertion.certainty))
print("......Association: {}".format(entity.assertion.association))
for relation in doc.entity_relations:
print("Relation of type: {} has the following roles".format(relation.relation_type))
for role in relation.roles:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def analyze_healthcare_entities(self):
for data_source in entity.data_sources:
print("......Entity ID: {}".format(data_source.entity_id))
print("......Name: {}".format(data_source.name))
if entity.assertion is not None:
print("...Assertion:")
print("......Conditionality: {}".format(entity.assertion.conditionality))
print("......Certainty: {}".format(entity.assertion.certainty))
print("......Association: {}".format(entity.assertion.association))
for relation in doc.entity_relations:
print("Relation of type: {} has the following roles".format(relation.relation_type))
for role in relation.roles:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Baby not likely to have Meningitis.",
"language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '93'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs?stringIndexType=UnicodeCodePoint
response:
body:
string: ''
headers:
apim-request-id:
- 982d8d11-7c5e-41cc-afb4-3a0a39cf7e5b
date:
- Thu, 04 Mar 2021 17:06:40 GMT
operation-location:
- https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/40590cb7-4fa0-4ce8-afed-19b16395bd65
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '355'
status:
code: 202
message: Accepted
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: GET
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/40590cb7-4fa0-4ce8-afed-19b16395bd65
response:
body:
string: '{"jobId":"40590cb7-4fa0-4ce8-afed-19b16395bd65","lastUpdateDateTime":"2021-03-04T17:06:40Z","createdDateTime":"2021-03-04T17:06:40Z","expirationDateTime":"2021-03-05T17:06:40Z","status":"notStarted","errors":[]}'
headers:
apim-request-id:
- fb188517-edcf-402f-9d1e-3d1fe1c757e7
content-type:
- application/json; charset=utf-8
date:
- Thu, 04 Mar 2021 17:06:45 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '179'
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: GET
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/40590cb7-4fa0-4ce8-afed-19b16395bd65
response:
body:
string: '{"jobId":"40590cb7-4fa0-4ce8-afed-19b16395bd65","lastUpdateDateTime":"2021-03-04T17:06:48Z","createdDateTime":"2021-03-04T17:06:40Z","expirationDateTime":"2021-03-05T17:06:40Z","status":"succeeded","errors":[],"results":{"documents":[{"id":"0","entities":[{"offset":0,"length":4,"text":"Baby","category":"Age","confidenceScore":0.89,"links":[{"dataSource":"UMLS","id":"C0021270"},{"dataSource":"AOD","id":"0000005273"},{"dataSource":"CCPSS","id":"0030805"},{"dataSource":"CHV","id":"0000006675"},{"dataSource":"DXP","id":"U002089"},{"dataSource":"LCH","id":"U002421"},{"dataSource":"LCH_NW","id":"sh85066022"},{"dataSource":"LNC","id":"LA19747-7"},{"dataSource":"MDR","id":"10021731"},{"dataSource":"MSH","id":"D007223"},{"dataSource":"NCI","id":"C27956"},{"dataSource":"NCI_FDA","id":"C27956"},{"dataSource":"NCI_NICHD","id":"C27956"},{"dataSource":"SNOMEDCT_US","id":"133931009"}]},{"offset":24,"length":10,"text":"Meningitis","category":"Diagnosis","confidenceScore":1.0,"assertion":{"certainty":"negative"},"links":[{"dataSource":"UMLS","id":"C0025289"},{"dataSource":"AOD","id":"0000006185"},{"dataSource":"BI","id":"BI00546"},{"dataSource":"CCPSS","id":"1018016"},{"dataSource":"CCSR_10","id":"NVS001"},{"dataSource":"CHV","id":"0000007932"},{"dataSource":"COSTAR","id":"478"},{"dataSource":"CSP","id":"2042-5301"},{"dataSource":"CST","id":"MENINGITIS"},{"dataSource":"DXP","id":"U002543"},{"dataSource":"HPO","id":"HP:0001287"},{"dataSource":"ICD10","id":"G03.9"},{"dataSource":"ICD10AM","id":"G03.9"},{"dataSource":"ICD10CM","id":"G03.9"},{"dataSource":"ICD9CM","id":"322.9"},{"dataSource":"ICPC2ICD10ENG","id":"MTHU048434"},{"dataSource":"ICPC2P","id":"N71002"},{"dataSource":"LCH","id":"U002901"},{"dataSource":"LCH_NW","id":"sh85083562"},{"dataSource":"LNC","id":"LP20756-0"},{"dataSource":"MDR","id":"10027199"},{"dataSource":"MEDCIN","id":"31192"},{"dataSource":"MEDLINEPLUS","id":"324"},{"dataSource":"MSH","id":"D008581"},{"dataSource":"NANDA-I","id":"02899"},{"dataSource":"NCI","id":"C26828"},{"dataSource":"NCI_CPTAC","id":"C26828"},{"dataSource":"NCI_CTCAE","id":"E11458"},{"dataSource":"NCI_FDA","id":"2389"},{"dataSource":"NCI_NCI-GLOSS","id":"CDR0000471780"},{"dataSource":"NCI_NICHD","id":"C26828"},{"dataSource":"OMIM","id":"MTHU005994"},{"dataSource":"PSY","id":"30660"},{"dataSource":"RCD","id":"X000H"},{"dataSource":"SNM","id":"M-40000"},{"dataSource":"SNMI","id":"DA-10010"},{"dataSource":"SNOMEDCT_US","id":"7180009"},{"dataSource":"WHO","id":"0955"}]}],"relations":[],"warnings":[]}],"errors":[],"modelVersion":"2021-01-11"}}'
headers:
apim-request-id:
- 30f0264a-61c1-4137-92bb-747ed9ca8e15
content-type:
- application/json; charset=utf-8
date:
- Thu, 04 Mar 2021 17:06:51 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '224'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Baby not likely to have Meningitis.",
"language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '93'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs?stringIndexType=UnicodeCodePoint
response:
body:
string: ''
headers:
apim-request-id: 90f5a116-0d67-4ca0-bdf2-ead5e23bcca7
date: Thu, 04 Mar 2021 17:06:51 GMT
operation-location: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/e3f9abec-0960-4602-b4af-f91991dccb57
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '188'
status:
code: 202
message: Accepted
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.4/entities/health/jobs?stringIndexType=UnicodeCodePoint
- request:
body: null
headers:
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: GET
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/e3f9abec-0960-4602-b4af-f91991dccb57
response:
body:
string: '{"jobId":"e3f9abec-0960-4602-b4af-f91991dccb57","lastUpdateDateTime":"2021-03-04T17:06:52Z","createdDateTime":"2021-03-04T17:06:51Z","expirationDateTime":"2021-03-05T17:06:51Z","status":"succeeded","errors":[],"results":{"documents":[{"id":"0","entities":[{"offset":0,"length":4,"text":"Baby","category":"Age","confidenceScore":0.89,"links":[{"dataSource":"UMLS","id":"C0021270"},{"dataSource":"AOD","id":"0000005273"},{"dataSource":"CCPSS","id":"0030805"},{"dataSource":"CHV","id":"0000006675"},{"dataSource":"DXP","id":"U002089"},{"dataSource":"LCH","id":"U002421"},{"dataSource":"LCH_NW","id":"sh85066022"},{"dataSource":"LNC","id":"LA19747-7"},{"dataSource":"MDR","id":"10021731"},{"dataSource":"MSH","id":"D007223"},{"dataSource":"NCI","id":"C27956"},{"dataSource":"NCI_FDA","id":"C27956"},{"dataSource":"NCI_NICHD","id":"C27956"},{"dataSource":"SNOMEDCT_US","id":"133931009"}]},{"offset":24,"length":10,"text":"Meningitis","category":"Diagnosis","confidenceScore":1.0,"assertion":{"certainty":"negative"},"links":[{"dataSource":"UMLS","id":"C0025289"},{"dataSource":"AOD","id":"0000006185"},{"dataSource":"BI","id":"BI00546"},{"dataSource":"CCPSS","id":"1018016"},{"dataSource":"CCSR_10","id":"NVS001"},{"dataSource":"CHV","id":"0000007932"},{"dataSource":"COSTAR","id":"478"},{"dataSource":"CSP","id":"2042-5301"},{"dataSource":"CST","id":"MENINGITIS"},{"dataSource":"DXP","id":"U002543"},{"dataSource":"HPO","id":"HP:0001287"},{"dataSource":"ICD10","id":"G03.9"},{"dataSource":"ICD10AM","id":"G03.9"},{"dataSource":"ICD10CM","id":"G03.9"},{"dataSource":"ICD9CM","id":"322.9"},{"dataSource":"ICPC2ICD10ENG","id":"MTHU048434"},{"dataSource":"ICPC2P","id":"N71002"},{"dataSource":"LCH","id":"U002901"},{"dataSource":"LCH_NW","id":"sh85083562"},{"dataSource":"LNC","id":"LP20756-0"},{"dataSource":"MDR","id":"10027199"},{"dataSource":"MEDCIN","id":"31192"},{"dataSource":"MEDLINEPLUS","id":"324"},{"dataSource":"MSH","id":"D008581"},{"dataSource":"NANDA-I","id":"02899"},{"dataSource":"NCI","id":"C26828"},{"dataSource":"NCI_CPTAC","id":"C26828"},{"dataSource":"NCI_CTCAE","id":"E11458"},{"dataSource":"NCI_FDA","id":"2389"},{"dataSource":"NCI_NCI-GLOSS","id":"CDR0000471780"},{"dataSource":"NCI_NICHD","id":"C26828"},{"dataSource":"OMIM","id":"MTHU005994"},{"dataSource":"PSY","id":"30660"},{"dataSource":"RCD","id":"X000H"},{"dataSource":"SNM","id":"M-40000"},{"dataSource":"SNMI","id":"DA-10010"},{"dataSource":"SNOMEDCT_US","id":"7180009"},{"dataSource":"WHO","id":"0955"}]}],"relations":[],"warnings":[]}],"errors":[],"modelVersion":"2021-01-11"}}'
headers:
apim-request-id: 40cc7792-e2e3-4722-beac-485880adb432
content-type: application/json; charset=utf-8
date: Thu, 04 Mar 2021 17:06:56 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '151'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/e3f9abec-0960-4602-b4af-f91991dccb57
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,22 @@ def test_normalized_text(self, client):
polling_interval=self._interval(),
).result())

# currently just testing it has that attribute.
# have an issue to update https://github.com/Azure/azure-sdk-for-python/issues/17072

assert all([
e for e in result[0].entities if hasattr(e, "normalized_text")
])

histologically_entity = list(filter(lambda x: x.text == "histologically", result[0].entities))[0]
assert histologically_entity.normalized_text == "Histology Procedure"
assert histologically_entity.normalized_text == "Histology Procedure"

@GlobalTextAnalyticsAccountPreparer()
@TextAnalyticsClientPreparer()
def test_healthcare_assertion(self, client):
result = list(client.begin_analyze_healthcare_entities(
documents=["Baby not likely to have Meningitis."]
).result())

# currently can only test certainty
# have an issue to update https://github.com/Azure/azure-sdk-for-python/issues/17088
meningitis_entity = next(e for e in result[0].entities if e.text == "Meningitis")
assert meningitis_entity.assertion.certainty == "negative"

Loading