Skip to content

Commit

Permalink
[text analytics] assertions (#17098)
Browse files Browse the repository at this point in the history
fixes #16498
  • Loading branch information
iscai-msft authored Mar 5, 2021
1 parent d03c270 commit c407bd1
Show file tree
Hide file tree
Showing 11 changed files with 289 additions and 15 deletions.
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
exists on the `HealthcareEntity`
- Add property `assertion` onto `HealthcareEntity`. This contains assertions about the entity itself, i.e. if the entity represents a diagnosis,
is this diagnosis conditional on a symptom?

## 5.1.0b5 (2021-02-10)

Expand Down
5 changes: 5 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,11 @@ for idx, doc in enumerate(docs):
for data_source in entity.data_sources:
print("......Entity ID: {}".format(data_source.entity_id))
print("......Name: {}".format(data_source.name))
if entity.assertion is not None:
print("...Assertion:")
print("......Conditionality: {}".format(entity.assertion.conditionality))
print("......Certainty: {}".format(entity.assertion.certainty))
print("......Association: {}".format(entity.assertion.association))
for relation in doc.entity_relations:
print("Relation of type: {} has the following roles".format(relation.relation_type))
for role in relation.roles:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@
HealthcareEntityRelationRoleType,
)
from ._paging import AnalyzeHealthcareEntitiesResult
from ._generated.v3_1_preview_4.models import RelationType as HealthcareEntityRelationType
from ._generated.v3_1_preview_4.models import (
RelationType as HealthcareEntityRelationType,
)

__all__ = [
'TextAnalyticsApiVersion',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,11 @@ class HealthcareEntity(DictMixin):
:ivar str category: Entity category, see the following link for health's named
entity types: https://aka.ms/text-analytics-health-entities
:ivar str subcategory: Entity subcategory.
:ivar assertion: Contains various assertions about this entity. For example, if
an entity is a diagnosis, is this diagnosis 'conditional' on a symptom?
Are the doctors 'certain' about this diagnosis? Is this diagnosis 'associated'
with another diagnosis?
:vartype assertion: ~azure.ai.textanalytics.HealthcareEntityAssertion
:ivar int length: The entity text length. This value depends on the value
of the `string_index_type` parameter specified in the original request, which is
UnicodeCodePoints by default.
Expand All @@ -515,18 +520,29 @@ def __init__(self, **kwargs):
self.normalized_text = kwargs.get("normalized_text", None)
self.category = kwargs.get("category", None)
self.subcategory = kwargs.get("subcategory", None)
self.assertion = kwargs.get("assertion", None)
self.length = kwargs.get("length", None)
self.offset = kwargs.get("offset", None)
self.confidence_score = kwargs.get("confidence_score", None)
self.data_sources = kwargs.get("data_sources", [])

@classmethod
def _from_generated(cls, healthcare_entity):
assertion = None
try:
if healthcare_entity.assertion:
assertion = HealthcareEntityAssertion._from_generated( # pylint: disable=protected-access
healthcare_entity.assertion
)
except AttributeError:
assertion = None

return cls(
text=healthcare_entity.text,
normalized_text=healthcare_entity.name,
category=healthcare_entity.category,
subcategory=healthcare_entity.subcategory,
assertion=assertion,
length=healthcare_entity.length,
offset=healthcare_entity.offset,
confidence_score=healthcare_entity.confidence_score,
Expand All @@ -539,18 +555,58 @@ def __hash__(self):
return hash(repr(self))

def __repr__(self):
return "HealthcareEntity(text={}, normalized_text={}, category={}, subcategory={}, length={}, offset={}, "\
"confidence_score={}, data_sources={})".format(
return "HealthcareEntity(text={}, normalized_text={}, category={}, subcategory={}, assertion={}, length={}, "\
"offset={}, confidence_score={}, data_sources={})".format(
self.text,
self.normalized_text,
self.category,
self.subcategory,
repr(self.assertion),
self.length,
self.offset,
self.confidence_score,
repr(self.data_sources),
)[:1024]

class HealthcareEntityAssertion(DictMixin):
"""Contains various assertions about a `HealthcareEntity`.
For example, if an entity is a diagnosis, is this diagnosis 'conditional' on a symptom?
Are the doctors 'certain' about this diagnosis? Is this diagnosis 'associated'
with another diagnosis?
:ivar str conditionality: Describes whether the healthcare entity it's on is conditional on another entity.
For example, "If the patient has a fever, he has pneumonia", the diagnosis of pneumonia
is 'conditional' on whether the patient has a fever. Possible values are "hypothetical" and
"conditional".
:ivar str certainty: Describes how certain the healthcare entity it's on is. For example,
in "The patient may have a fever", the fever entity is not 100% certain, but is instead
"positivePossible". Possible values are "positive", "positivePossible", "neutralPossible",
"negativePossible", and "negative".
:ivar str association: Describes whether the healthcare entity it's on is the subject of the document, or
if this entity describes someone else in the document. For example, in "The subject's mother has
a fever", the "fever" entity is not associated with the subject themselves, but with the subject's
mother. Possible values are "subject" and "other".
"""

def __init__(self, **kwargs):
self.conditionality = kwargs.get("conditionality", None)
self.certainty = kwargs.get("certainty", None)
self.association = kwargs.get("association", None)

@classmethod
def _from_generated(cls, healthcare_assertion):
return cls(
conditionality=healthcare_assertion.conditionality,
certainty=healthcare_assertion.certainty,
association=healthcare_assertion.association,
)

def __repr__(self):
return "HealthcareEntityAssertion(conditionality={}, certainty={}, association={})".format(
self.conditionality, self.certainty, self.association
)


class HealthcareEntityDataSource(DictMixin):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ async def analyze_healthcare_entities_async(self):
for data_source in entity.data_sources:
print("......Entity ID: {}".format(data_source.entity_id))
print("......Name: {}".format(data_source.name))
if entity.assertion is not None:
print("...Assertion:")
print("......Conditionality: {}".format(entity.assertion.conditionality))
print("......Certainty: {}".format(entity.assertion.certainty))
print("......Association: {}".format(entity.assertion.association))
for relation in doc.entity_relations:
print("Relation of type: {} has the following roles".format(relation.relation_type))
for role in relation.roles:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def analyze_healthcare_entities(self):
for data_source in entity.data_sources:
print("......Entity ID: {}".format(data_source.entity_id))
print("......Name: {}".format(data_source.name))
if entity.assertion is not None:
print("...Assertion:")
print("......Conditionality: {}".format(entity.assertion.conditionality))
print("......Certainty: {}".format(entity.assertion.certainty))
print("......Association: {}".format(entity.assertion.association))
for relation in doc.entity_relations:
print("Relation of type: {} has the following roles".format(relation.relation_type))
for role in relation.roles:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Baby not likely to have Meningitis.",
"language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '93'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs?stringIndexType=UnicodeCodePoint
response:
body:
string: ''
headers:
apim-request-id:
- 982d8d11-7c5e-41cc-afb4-3a0a39cf7e5b
date:
- Thu, 04 Mar 2021 17:06:40 GMT
operation-location:
- https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/40590cb7-4fa0-4ce8-afed-19b16395bd65
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '355'
status:
code: 202
message: Accepted
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: GET
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/40590cb7-4fa0-4ce8-afed-19b16395bd65
response:
body:
string: '{"jobId":"40590cb7-4fa0-4ce8-afed-19b16395bd65","lastUpdateDateTime":"2021-03-04T17:06:40Z","createdDateTime":"2021-03-04T17:06:40Z","expirationDateTime":"2021-03-05T17:06:40Z","status":"notStarted","errors":[]}'
headers:
apim-request-id:
- fb188517-edcf-402f-9d1e-3d1fe1c757e7
content-type:
- application/json; charset=utf-8
date:
- Thu, 04 Mar 2021 17:06:45 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '179'
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: GET
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/40590cb7-4fa0-4ce8-afed-19b16395bd65
response:
body:
string: '{"jobId":"40590cb7-4fa0-4ce8-afed-19b16395bd65","lastUpdateDateTime":"2021-03-04T17:06:48Z","createdDateTime":"2021-03-04T17:06:40Z","expirationDateTime":"2021-03-05T17:06:40Z","status":"succeeded","errors":[],"results":{"documents":[{"id":"0","entities":[{"offset":0,"length":4,"text":"Baby","category":"Age","confidenceScore":0.89,"links":[{"dataSource":"UMLS","id":"C0021270"},{"dataSource":"AOD","id":"0000005273"},{"dataSource":"CCPSS","id":"0030805"},{"dataSource":"CHV","id":"0000006675"},{"dataSource":"DXP","id":"U002089"},{"dataSource":"LCH","id":"U002421"},{"dataSource":"LCH_NW","id":"sh85066022"},{"dataSource":"LNC","id":"LA19747-7"},{"dataSource":"MDR","id":"10021731"},{"dataSource":"MSH","id":"D007223"},{"dataSource":"NCI","id":"C27956"},{"dataSource":"NCI_FDA","id":"C27956"},{"dataSource":"NCI_NICHD","id":"C27956"},{"dataSource":"SNOMEDCT_US","id":"133931009"}]},{"offset":24,"length":10,"text":"Meningitis","category":"Diagnosis","confidenceScore":1.0,"assertion":{"certainty":"negative"},"links":[{"dataSource":"UMLS","id":"C0025289"},{"dataSource":"AOD","id":"0000006185"},{"dataSource":"BI","id":"BI00546"},{"dataSource":"CCPSS","id":"1018016"},{"dataSource":"CCSR_10","id":"NVS001"},{"dataSource":"CHV","id":"0000007932"},{"dataSource":"COSTAR","id":"478"},{"dataSource":"CSP","id":"2042-5301"},{"dataSource":"CST","id":"MENINGITIS"},{"dataSource":"DXP","id":"U002543"},{"dataSource":"HPO","id":"HP:0001287"},{"dataSource":"ICD10","id":"G03.9"},{"dataSource":"ICD10AM","id":"G03.9"},{"dataSource":"ICD10CM","id":"G03.9"},{"dataSource":"ICD9CM","id":"322.9"},{"dataSource":"ICPC2ICD10ENG","id":"MTHU048434"},{"dataSource":"ICPC2P","id":"N71002"},{"dataSource":"LCH","id":"U002901"},{"dataSource":"LCH_NW","id":"sh85083562"},{"dataSource":"LNC","id":"LP20756-0"},{"dataSource":"MDR","id":"10027199"},{"dataSource":"MEDCIN","id":"31192"},{"dataSource":"MEDLINEPLUS","id":"324"},{"dataSource":"MSH","id":"D008581"},{"dataSource":"NANDA-I","id":"02899"},{"dataSource":"NCI","id":"C26828"},{"dataSource":"NCI_CPTAC","id":"C26828"},{"dataSource":"NCI_CTCAE","id":"E11458"},{"dataSource":"NCI_FDA","id":"2389"},{"dataSource":"NCI_NCI-GLOSS","id":"CDR0000471780"},{"dataSource":"NCI_NICHD","id":"C26828"},{"dataSource":"OMIM","id":"MTHU005994"},{"dataSource":"PSY","id":"30660"},{"dataSource":"RCD","id":"X000H"},{"dataSource":"SNM","id":"M-40000"},{"dataSource":"SNMI","id":"DA-10010"},{"dataSource":"SNOMEDCT_US","id":"7180009"},{"dataSource":"WHO","id":"0955"}]}],"relations":[],"warnings":[]}],"errors":[],"modelVersion":"2021-01-11"}}'
headers:
apim-request-id:
- 30f0264a-61c1-4137-92bb-747ed9ca8e15
content-type:
- application/json; charset=utf-8
date:
- Thu, 04 Mar 2021 17:06:51 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '224'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Baby not likely to have Meningitis.",
"language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '93'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs?stringIndexType=UnicodeCodePoint
response:
body:
string: ''
headers:
apim-request-id: 90f5a116-0d67-4ca0-bdf2-ead5e23bcca7
date: Thu, 04 Mar 2021 17:06:51 GMT
operation-location: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/e3f9abec-0960-4602-b4af-f91991dccb57
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '188'
status:
code: 202
message: Accepted
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.4/entities/health/jobs?stringIndexType=UnicodeCodePoint
- request:
body: null
headers:
User-Agent:
- azsdk-python-ai-textanalytics/5.1.0b6 Python/3.9.1 (macOS-10.13.6-x86_64-i386-64bit)
method: GET
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/e3f9abec-0960-4602-b4af-f91991dccb57
response:
body:
string: '{"jobId":"e3f9abec-0960-4602-b4af-f91991dccb57","lastUpdateDateTime":"2021-03-04T17:06:52Z","createdDateTime":"2021-03-04T17:06:51Z","expirationDateTime":"2021-03-05T17:06:51Z","status":"succeeded","errors":[],"results":{"documents":[{"id":"0","entities":[{"offset":0,"length":4,"text":"Baby","category":"Age","confidenceScore":0.89,"links":[{"dataSource":"UMLS","id":"C0021270"},{"dataSource":"AOD","id":"0000005273"},{"dataSource":"CCPSS","id":"0030805"},{"dataSource":"CHV","id":"0000006675"},{"dataSource":"DXP","id":"U002089"},{"dataSource":"LCH","id":"U002421"},{"dataSource":"LCH_NW","id":"sh85066022"},{"dataSource":"LNC","id":"LA19747-7"},{"dataSource":"MDR","id":"10021731"},{"dataSource":"MSH","id":"D007223"},{"dataSource":"NCI","id":"C27956"},{"dataSource":"NCI_FDA","id":"C27956"},{"dataSource":"NCI_NICHD","id":"C27956"},{"dataSource":"SNOMEDCT_US","id":"133931009"}]},{"offset":24,"length":10,"text":"Meningitis","category":"Diagnosis","confidenceScore":1.0,"assertion":{"certainty":"negative"},"links":[{"dataSource":"UMLS","id":"C0025289"},{"dataSource":"AOD","id":"0000006185"},{"dataSource":"BI","id":"BI00546"},{"dataSource":"CCPSS","id":"1018016"},{"dataSource":"CCSR_10","id":"NVS001"},{"dataSource":"CHV","id":"0000007932"},{"dataSource":"COSTAR","id":"478"},{"dataSource":"CSP","id":"2042-5301"},{"dataSource":"CST","id":"MENINGITIS"},{"dataSource":"DXP","id":"U002543"},{"dataSource":"HPO","id":"HP:0001287"},{"dataSource":"ICD10","id":"G03.9"},{"dataSource":"ICD10AM","id":"G03.9"},{"dataSource":"ICD10CM","id":"G03.9"},{"dataSource":"ICD9CM","id":"322.9"},{"dataSource":"ICPC2ICD10ENG","id":"MTHU048434"},{"dataSource":"ICPC2P","id":"N71002"},{"dataSource":"LCH","id":"U002901"},{"dataSource":"LCH_NW","id":"sh85083562"},{"dataSource":"LNC","id":"LP20756-0"},{"dataSource":"MDR","id":"10027199"},{"dataSource":"MEDCIN","id":"31192"},{"dataSource":"MEDLINEPLUS","id":"324"},{"dataSource":"MSH","id":"D008581"},{"dataSource":"NANDA-I","id":"02899"},{"dataSource":"NCI","id":"C26828"},{"dataSource":"NCI_CPTAC","id":"C26828"},{"dataSource":"NCI_CTCAE","id":"E11458"},{"dataSource":"NCI_FDA","id":"2389"},{"dataSource":"NCI_NCI-GLOSS","id":"CDR0000471780"},{"dataSource":"NCI_NICHD","id":"C26828"},{"dataSource":"OMIM","id":"MTHU005994"},{"dataSource":"PSY","id":"30660"},{"dataSource":"RCD","id":"X000H"},{"dataSource":"SNM","id":"M-40000"},{"dataSource":"SNMI","id":"DA-10010"},{"dataSource":"SNOMEDCT_US","id":"7180009"},{"dataSource":"WHO","id":"0955"}]}],"relations":[],"warnings":[]}],"errors":[],"modelVersion":"2021-01-11"}}'
headers:
apim-request-id: 40cc7792-e2e3-4722-beac-485880adb432
content-type: application/json; charset=utf-8
date: Thu, 04 Mar 2021 17:06:56 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '151'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.4/entities/health/jobs/e3f9abec-0960-4602-b4af-f91991dccb57
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,22 @@ def test_normalized_text(self, client):
polling_interval=self._interval(),
).result())

# currently just testing it has that attribute.
# have an issue to update https://github.com/Azure/azure-sdk-for-python/issues/17072

assert all([
e for e in result[0].entities if hasattr(e, "normalized_text")
])

histologically_entity = list(filter(lambda x: x.text == "histologically", result[0].entities))[0]
assert histologically_entity.normalized_text == "Histology Procedure"
assert histologically_entity.normalized_text == "Histology Procedure"

@GlobalTextAnalyticsAccountPreparer()
@TextAnalyticsClientPreparer()
def test_healthcare_assertion(self, client):
result = list(client.begin_analyze_healthcare_entities(
documents=["Baby not likely to have Meningitis."]
).result())

# currently can only test certainty
# have an issue to update https://github.com/Azure/azure-sdk-for-python/issues/17088
meningitis_entity = next(e for e in result[0].entities if e.text == "Meningitis")
assert meningitis_entity.assertion.certainty == "negative"

Loading

0 comments on commit c407bd1

Please sign in to comment.