Skip to content

Commit

Permalink
Added two samples for "OCR with PDF/TIFF as source files" [(#2034)](G…
Browse files Browse the repository at this point in the history
…oogleCloudPlatform/python-docs-samples#2034)

* Added two samples for "OCR with PDF/TIFF as source files"

* Moved the code to beta_snippets.py

* Fixed the sub-parser names.

* Shortened the line that was too long.

* Added newline at the end of the file

* Using the builtin open function instead

* Renamed a variable

* Fixed the wrong arg parameter

* Added extra comment lines

* Regenerated README.rst

* Added specific strings to be unit-tested
  • Loading branch information
happyhuman authored Mar 12, 2019
1 parent 8b3ee74 commit 1bc0992
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 4 deletions.
14 changes: 12 additions & 2 deletions samples/snippets/detect/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ To run this sample:
$ python beta_snippets.py
usage: beta_snippets.py [-h]
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri}
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri,doc-features,doc-features-uri}
...
Google Cloud Vision API Python Beta Snippets
Expand All @@ -176,12 +176,14 @@ To run this sample:
python beta_snippets.py object-localization-uri gs://...
python beta_snippets.py handwritten-ocr INPUT_IMAGE
python beta_snippets.py handwritten-ocr-uri gs://...
python beta_snippets.py doc-features INPUT_PDF
python beta_snippets.py doc-features_uri gs://...
For more information, the documentation at
https://cloud.google.com/vision/docs.
positional arguments:
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri}
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri,doc-features,doc-features-uri}
object-localization
Localize objects in the local image. Args: path: The
path to the local file.
Expand All @@ -195,6 +197,14 @@ To run this sample:
Detects handwritten characters in the file located in
Google Cloud Storage. Args: uri: The path to the file
in Google Cloud Storage (gs://...)
doc-features Detects document features in a PDF/TIFF/GIF file.
While your PDF file may have several pages, this API
can process up to 5 pages only. Args: path: The path
to the local file.
doc-features-uri Detects document features in a PDF/TIFF/GIF file.
While your PDF file may have several pages, this API
can process up to 5 pages only. Args: uri: The path to
the file in Google Cloud Storage (gs://...)
optional arguments:
-h, --help show this help message and exit
Expand Down
113 changes: 113 additions & 0 deletions samples/snippets/detect/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
python beta_snippets.py object-localization-uri gs://...
python beta_snippets.py handwritten-ocr INPUT_IMAGE
python beta_snippets.py handwritten-ocr-uri gs://...
python beta_snippets.py doc-features INPUT_PDF
python beta_snippets.py doc-features_uri gs://...
For more information, the documentation at
Expand Down Expand Up @@ -174,6 +176,105 @@ def detect_handwritten_ocr_uri(uri):
# [END vision_handwritten_ocr_gcs_beta]


# [START vision_fulltext_detection_pdf_beta]
def detect_document_features(path):
"""Detects document features in a PDF/TIFF/GIF file.
While your PDF file may have several pages,
this API can process up to 5 pages only.
Args:
path: The path to the local file.
"""
from google.cloud import vision_v1p4beta1 as vision
client = vision.ImageAnnotatorClient()

with open(path, 'rb') as pdf_file:
content = pdf_file.read()

# Other supported mime_types: image/tiff' or 'image/gif'
mime_type = 'application/pdf'
input_config = vision.types.InputConfig(
content=content, mime_type=mime_type)

feature = vision.types.Feature(
type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
# Annotate the first two pages and the last one (max 5 pages)
# First page starts at 1, and not 0. Last page is -1.
pages = [1, 2, -1]

request = vision.types.AnnotateFileRequest(
input_config=input_config,
features=[feature],
pages=pages)

response = client.batch_annotate_files(requests=[request])

for image_response in response.responses[0].responses:
for page in image_response.full_text_annotation.pages:
for block in page.blocks:
print('\nBlock confidence: {}\n'.format(block.confidence))
for par in block.paragraphs:
print('\tParagraph confidence: {}'.format(par.confidence))
for word in par.words:
symbol_texts = [symbol.text for symbol in word.symbols]
word_text = ''.join(symbol_texts)
print('\t\tWord text: {} (confidence: {})'.format(
word_text, word.confidence))
for symbol in word.symbols:
print('\t\t\tSymbol: {} (confidence: {})'.format(
symbol.text, symbol.confidence))
# [END vision_fulltext_detection_pdf_beta]


# [START vision_fulltext_detection_pdf_gcs_beta]
def detect_document_features_uri(gcs_uri):
"""Detects document features in a PDF/TIFF/GIF file.
While your PDF file may have several pages,
this API can process up to 5 pages only.
Args:
uri: The path to the file in Google Cloud Storage (gs://...)
"""
from google.cloud import vision_v1p4beta1 as vision
client = vision.ImageAnnotatorClient()

# Other supported mime_types: image/tiff' or 'image/gif'
mime_type = 'application/pdf'
input_config = vision.types.InputConfig(
gcs_source=vision.types.GcsSource(uri=gcs_uri), mime_type=mime_type)

feature = vision.types.Feature(
type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
# Annotate the first two pages and the last one (max 5 pages)
# First page starts at 1, and not 0. Last page is -1.
pages = [1, 2, -1]

request = vision.types.AnnotateFileRequest(
input_config=input_config,
features=[feature],
pages=pages)

response = client.batch_annotate_files(requests=[request])

for image_response in response.responses[0].responses:
for page in image_response.full_text_annotation.pages:
for block in page.blocks:
print('\nBlock confidence: {}\n'.format(block.confidence))
for par in block.paragraphs:
print('\tParagraph confidence: {}'.format(par.confidence))
for word in par.words:
symbol_texts = [symbol.text for symbol in word.symbols]
word_text = ''.join(symbol_texts)
print('\t\tWord text: {} (confidence: {})'.format(
word_text, word.confidence))
for symbol in word.symbols:
print('\t\t\tSymbol: {} (confidence: {})'.format(
symbol.text, symbol.confidence))
# [END vision_fulltext_detection_pdf_gcs_beta]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -196,15 +297,27 @@ def detect_handwritten_ocr_uri(uri):
'handwritten-ocr-uri', help=detect_handwritten_ocr_uri.__doc__)
handwritten_uri_parser.add_argument('uri')

doc_features_parser = subparsers.add_parser(
'doc-features', help=detect_document_features.__doc__)
doc_features_parser.add_argument('path')

doc_features_uri_parser = subparsers.add_parser(
'doc-features-uri', help=detect_document_features_uri.__doc__)
doc_features_uri_parser.add_argument('uri')

args = parser.parse_args()

if 'uri' in args.command:
if 'object-localization-uri' in args.command:
localize_objects_uri(args.uri)
elif 'handwritten-ocr-uri' in args.command:
detect_handwritten_ocr_uri(args.uri)
elif 'doc-features' in args.command:
detect_handwritten_ocr_uri(args.uri)
else:
if 'object-localization' in args.command:
localize_objects(args.path)
elif 'handwritten-ocr' in args.command:
detect_handwritten_ocr(args.path)
elif 'doc-features' in args.command:
detect_handwritten_ocr(args.path)
21 changes: 19 additions & 2 deletions samples/snippets/detect/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import beta_snippets

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
GCS_ROOT = 'gs://cloud-samples-data/vision/'


def test_localize_objects(capsys):
Expand All @@ -28,7 +29,7 @@ def test_localize_objects(capsys):


def test_localize_objects_uri(capsys):
uri = 'gs://cloud-samples-data/vision/puppies.jpg'
uri = GCS_ROOT + 'puppies.jpg'

beta_snippets.localize_objects_uri(uri)

Expand All @@ -46,9 +47,25 @@ def test_handwritten_ocr(capsys):


def test_handwritten_ocr_uri(capsys):
uri = 'gs://cloud-samples-data/vision/handwritten.jpg'
uri = GCS_ROOT + 'handwritten.jpg'

beta_snippets.detect_handwritten_ocr_uri(uri)

out, _ = capsys.readouterr()
assert 'Cloud Vision API' in out


def test_detect_pdf_document(capsys):
file_name = os.path.join(RESOURCES, 'kafka.pdf')
beta_snippets.detect_document_features(file_name)
out, _ = capsys.readouterr()
assert 'Symbol: a' in out
assert 'Word text: evenings' in out


def test_detect_pdf_document_from_gcs(capsys):
gcs_uri = GCS_ROOT + 'document_understanding/kafka.pdf'
beta_snippets.detect_document_features_uri(gcs_uri)
out, _ = capsys.readouterr()
assert 'Symbol' in out
assert 'Word text' in out
Binary file added samples/snippets/detect/resources/kafka.pdf
Binary file not shown.

0 comments on commit 1bc0992

Please sign in to comment.