diff --git a/vision/cloud-client/detect/README.rst b/vision/cloud-client/detect/README.rst index 8a077ab0406f..0ac276248299 100644 --- a/vision/cloud-client/detect/README.rst +++ b/vision/cloud-client/detect/README.rst @@ -165,7 +165,7 @@ To run this sample: $ python beta_snippets.py usage: beta_snippets.py [-h] - {object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri} + {object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri,doc-features,doc-features-uri} ... Google Cloud Vision API Python Beta Snippets @@ -176,12 +176,14 @@ To run this sample: python beta_snippets.py object-localization-uri gs://... python beta_snippets.py handwritten-ocr INPUT_IMAGE python beta_snippets.py handwritten-ocr-uri gs://... + python beta_snippets.py doc-features INPUT_PDF + python beta_snippets.py doc-features_uri gs://... For more information, the documentation at https://cloud.google.com/vision/docs. positional arguments: - {object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri} + {object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri,doc-features,doc-features-uri} object-localization Localize objects in the local image. Args: path: The path to the local file. @@ -195,6 +197,14 @@ To run this sample: Detects handwritten characters in the file located in Google Cloud Storage. Args: uri: The path to the file in Google Cloud Storage (gs://...) + doc-features Detects document features in a PDF/TIFF/GIF file. + While your PDF file may have several pages, this API + can process up to 5 pages only. Args: path: The path + to the local file. + doc-features-uri Detects document features in a PDF/TIFF/GIF file. + While your PDF file may have several pages, this API + can process up to 5 pages only. Args: uri: The path to + the file in Google Cloud Storage (gs://...) optional arguments: -h, --help show this help message and exit diff --git a/vision/cloud-client/detect/beta_snippets.py b/vision/cloud-client/detect/beta_snippets.py index 9b745a58a451..f035c1d5cd46 100644 --- a/vision/cloud-client/detect/beta_snippets.py +++ b/vision/cloud-client/detect/beta_snippets.py @@ -23,6 +23,8 @@ python beta_snippets.py object-localization-uri gs://... python beta_snippets.py handwritten-ocr INPUT_IMAGE python beta_snippets.py handwritten-ocr-uri gs://... +python beta_snippets.py doc-features INPUT_PDF +python beta_snippets.py doc-features_uri gs://... For more information, the documentation at @@ -174,6 +176,105 @@ def detect_handwritten_ocr_uri(uri): # [END vision_handwritten_ocr_gcs_beta] +# [START vision_fulltext_detection_pdf_beta] +def detect_document_features(path): + """Detects document features in a PDF/TIFF/GIF file. + + While your PDF file may have several pages, + this API can process up to 5 pages only. + + Args: + path: The path to the local file. + """ + from google.cloud import vision_v1p4beta1 as vision + client = vision.ImageAnnotatorClient() + + with open(path, 'rb') as pdf_file: + content = pdf_file.read() + + # Other supported mime_types: image/tiff' or 'image/gif' + mime_type = 'application/pdf' + input_config = vision.types.InputConfig( + content=content, mime_type=mime_type) + + feature = vision.types.Feature( + type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION) + # Annotate the first two pages and the last one (max 5 pages) + # First page starts at 1, and not 0. Last page is -1. + pages = [1, 2, -1] + + request = vision.types.AnnotateFileRequest( + input_config=input_config, + features=[feature], + pages=pages) + + response = client.batch_annotate_files(requests=[request]) + + for image_response in response.responses[0].responses: + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print('\nBlock confidence: {}\n'.format(block.confidence)) + for par in block.paragraphs: + print('\tParagraph confidence: {}'.format(par.confidence)) + for word in par.words: + symbol_texts = [symbol.text for symbol in word.symbols] + word_text = ''.join(symbol_texts) + print('\t\tWord text: {} (confidence: {})'.format( + word_text, word.confidence)) + for symbol in word.symbols: + print('\t\t\tSymbol: {} (confidence: {})'.format( + symbol.text, symbol.confidence)) +# [END vision_fulltext_detection_pdf_beta] + + +# [START vision_fulltext_detection_pdf_gcs_beta] +def detect_document_features_uri(gcs_uri): + """Detects document features in a PDF/TIFF/GIF file. + + While your PDF file may have several pages, + this API can process up to 5 pages only. + + Args: + uri: The path to the file in Google Cloud Storage (gs://...) + """ + from google.cloud import vision_v1p4beta1 as vision + client = vision.ImageAnnotatorClient() + + # Other supported mime_types: image/tiff' or 'image/gif' + mime_type = 'application/pdf' + input_config = vision.types.InputConfig( + gcs_source=vision.types.GcsSource(uri=gcs_uri), mime_type=mime_type) + + feature = vision.types.Feature( + type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION) + # Annotate the first two pages and the last one (max 5 pages) + # First page starts at 1, and not 0. Last page is -1. + pages = [1, 2, -1] + + request = vision.types.AnnotateFileRequest( + input_config=input_config, + features=[feature], + pages=pages) + + response = client.batch_annotate_files(requests=[request]) + + for image_response in response.responses[0].responses: + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print('\nBlock confidence: {}\n'.format(block.confidence)) + for par in block.paragraphs: + print('\tParagraph confidence: {}'.format(par.confidence)) + for word in par.words: + symbol_texts = [symbol.text for symbol in word.symbols] + word_text = ''.join(symbol_texts) + print('\t\tWord text: {} (confidence: {})'.format( + word_text, word.confidence)) + for symbol in word.symbols: + print('\t\t\tSymbol: {} (confidence: {})'.format( + symbol.text, symbol.confidence)) +# [END vision_fulltext_detection_pdf_gcs_beta] + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -196,6 +297,14 @@ def detect_handwritten_ocr_uri(uri): 'handwritten-ocr-uri', help=detect_handwritten_ocr_uri.__doc__) handwritten_uri_parser.add_argument('uri') + doc_features_parser = subparsers.add_parser( + 'doc-features', help=detect_document_features.__doc__) + doc_features_parser.add_argument('path') + + doc_features_uri_parser = subparsers.add_parser( + 'doc-features-uri', help=detect_document_features_uri.__doc__) + doc_features_uri_parser.add_argument('uri') + args = parser.parse_args() if 'uri' in args.command: @@ -203,8 +312,12 @@ def detect_handwritten_ocr_uri(uri): localize_objects_uri(args.uri) elif 'handwritten-ocr-uri' in args.command: detect_handwritten_ocr_uri(args.uri) + elif 'doc-features' in args.command: + detect_handwritten_ocr_uri(args.uri) else: if 'object-localization' in args.command: localize_objects(args.path) elif 'handwritten-ocr' in args.command: detect_handwritten_ocr(args.path) + elif 'doc-features' in args.command: + detect_handwritten_ocr(args.path) diff --git a/vision/cloud-client/detect/beta_snippets_test.py b/vision/cloud-client/detect/beta_snippets_test.py index 8b4c4078352e..6f5fd70f4f5a 100644 --- a/vision/cloud-client/detect/beta_snippets_test.py +++ b/vision/cloud-client/detect/beta_snippets_test.py @@ -16,6 +16,7 @@ import beta_snippets RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') +GCS_ROOT = 'gs://cloud-samples-data/vision/' def test_localize_objects(capsys): @@ -28,7 +29,7 @@ def test_localize_objects(capsys): def test_localize_objects_uri(capsys): - uri = 'gs://cloud-samples-data/vision/puppies.jpg' + uri = GCS_ROOT + 'puppies.jpg' beta_snippets.localize_objects_uri(uri) @@ -46,9 +47,25 @@ def test_handwritten_ocr(capsys): def test_handwritten_ocr_uri(capsys): - uri = 'gs://cloud-samples-data/vision/handwritten.jpg' + uri = GCS_ROOT + 'handwritten.jpg' beta_snippets.detect_handwritten_ocr_uri(uri) out, _ = capsys.readouterr() assert 'Cloud Vision API' in out + + +def test_detect_pdf_document(capsys): + file_name = os.path.join(RESOURCES, 'kafka.pdf') + beta_snippets.detect_document_features(file_name) + out, _ = capsys.readouterr() + assert 'Symbol: a' in out + assert 'Word text: evenings' in out + + +def test_detect_pdf_document_from_gcs(capsys): + gcs_uri = GCS_ROOT + 'document_understanding/kafka.pdf' + beta_snippets.detect_document_features_uri(gcs_uri) + out, _ = capsys.readouterr() + assert 'Symbol' in out + assert 'Word text' in out diff --git a/vision/cloud-client/detect/resources/kafka.pdf b/vision/cloud-client/detect/resources/kafka.pdf new file mode 100644 index 000000000000..ffa2e2fac2f4 Binary files /dev/null and b/vision/cloud-client/detect/resources/kafka.pdf differ