Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[formrecognizer] Add samples for converting to and from dictionary #21770

Merged
5 commits merged into from
Nov 24, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict_async.py

DESCRIPTION:
This sample demonstrates how to convert models returned from a recognize operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.

USAGE:
python sample_convert_to_and_from_dict_async.py

Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json
import asyncio

async def convert_to_and_from_dict_async():
path_to_sample_forms = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"..",
"./sample_forms/id_documents/license.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient
from azure.ai.formrecognizer import RecognizedForm

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_recognizer_client = FormRecognizerClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with form_recognizer_client:
with open(path_to_sample_forms, "rb") as f:
poller = await form_recognizer_client.begin_recognize_identity_documents(identity_document=f)

id_documents = await poller.result()

# convert the received model to a dictionary
recognized_form_dict = [doc.to_dict() for doc in id_documents]

# save the dictionary as a JSON content in a JSON file
catalinaperalta marked this conversation as resolved.
Show resolved Hide resolved
with open('data.json', 'w') as f:
json.dump(recognized_form_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = [RecognizedForm.from_dict(doc) for doc in recognized_form_dict]

# use the model as normal
for idx, id_document in enumerate(model):
print("--------Recognizing converted ID document #{}--------".format(idx+1))
first_name = id_document.fields.get("FirstName")
if first_name:
print("First Name: {} has confidence: {}".format(first_name.value, first_name.confidence))
last_name = id_document.fields.get("LastName")
if last_name:
print("Last Name: {} has confidence: {}".format(last_name.value, last_name.confidence))
document_number = id_document.fields.get("DocumentNumber")
if document_number:
print("Document Number: {} has confidence: {}".format(document_number.value, document_number.confidence))

print("----------------------------------------")


async def main():
await convert_to_and_from_dict_async()


if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict.py

DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.

USAGE:
python sample_convert_to_and_from_dict.py

Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json

def convert_to_and_from_dict():
path_to_sample_forms = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./sample_forms/id_documents/license.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import RecognizedForm

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_recognizer_client = FormRecognizerClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
with open(path_to_sample_forms, "rb") as f:
poller = form_recognizer_client.begin_recognize_identity_documents(identity_document=f)

id_documents = poller.result()

# convert the received model to a dictionary
recognized_form_dict = [doc.to_dict() for doc in id_documents]

# save the dictionary as a JSON content in a JSON file
with open('data.json', 'w') as f:
json.dump(recognized_form_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = [RecognizedForm.from_dict(doc) for doc in recognized_form_dict]

# use the model as normal
for idx, id_document in enumerate(model):
print("--------Recognizing converted ID document #{}--------".format(idx+1))
first_name = id_document.fields.get("FirstName")
if first_name:
print("First Name: {} has confidence: {}".format(first_name.value, first_name.confidence))
last_name = id_document.fields.get("LastName")
if last_name:
print("Last Name: {} has confidence: {}".format(last_name.value, last_name.confidence))
document_number = id_document.fields.get("DocumentNumber")
if document_number:
print("Document Number: {} has confidence: {}".format(document_number.value, document_number.confidence))

print("----------------------------------------")


if __name__ == "__main__":
convert_to_and_from_dict()
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# coding: utf-8
catalinaperalta marked this conversation as resolved.
Show resolved Hide resolved

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict_async.py

DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.

USAGE:
python sample_convert_to_and_from_dict_async.py

Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json
import asyncio

async def convert_to_and_from_dict_async():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"..",
"./sample_forms/forms/Form_1.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient
from azure.ai.formrecognizer import AnalyzeResult

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
with open(path_to_sample_documents, "rb") as f:
poller = await document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
)
result = await poller.result()

# convert the received model to a dictionary
analyze_result_dict = result.to_dict()

# save the dictionary as a JSON content in a JSON file
with open('data.json', 'w') as f:
json.dump(analyze_result_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = AnalyzeResult.from_dict(analyze_result_dict)

# use the model as normal
print("----Converted from dictionary AnalyzeResult----")
print("Model ID: '{}'".format(model.model_id))
print("Number of pages analyzed {}".format(len(model.pages)))
print("API version used: {}".format(model.api_version))

print("----------------------------------------")


async def main():
await convert_to_and_from_dict_async()


if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict.py

DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.

USAGE:
python sample_convert_to_and_from_dict.py

Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json

def convert_to_and_from_dict():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./sample_forms/forms/Form_1.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
)
result = poller.result()

# convert the received model to a dictionary
analyze_result_dict = result.to_dict()

# save the dictionary as a JSON content in a JSON file
with open('data.json', 'w') as f:
json.dump(analyze_result_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = AnalyzeResult.from_dict(analyze_result_dict)

# use the model as normal
print("----Converted from dictionary AnalyzeResult----")
print("Model ID: '{}'".format(model.model_id))
print("Number of pages analyzed {}".format(len(model.pages)))
print("API version used: {}".format(model.api_version))

print("----------------------------------------")


if __name__ == "__main__":
convert_to_and_from_dict()