diff --git a/samples/snippets/detect/README.rst b/samples/snippets/detect/README.rst index c0eb13fb..0f6f5003 100644 --- a/samples/snippets/detect/README.rst +++ b/samples/snippets/detect/README.rst @@ -81,7 +81,7 @@ To run this sample: $ python detect.py usage: detect.py [-h] - {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri} + {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri} ... This application demonstrates how to perform basic operations with the @@ -94,12 +94,13 @@ To run this sample: python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg + python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/ For more information, the documentation at https://cloud.google.com/vision/docs. positional arguments: - {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri} + {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri} faces Detects faces in an image. faces-uri Detects faces in the file located in Google Cloud Storage or the web. @@ -135,6 +136,7 @@ To run this sample: document Detects document features in an image. document-uri Detects document features in the file located in Google Cloud Storage. + ocr-uri OCR with PDF/TIFF as source files on GCS optional arguments: -h, --help show this help message and exit diff --git a/samples/snippets/detect/detect.py b/samples/snippets/detect/detect.py index 074f4ccc..7a15430d 100644 --- a/samples/snippets/detect/detect.py +++ b/samples/snippets/detect/detect.py @@ -24,6 +24,8 @@ python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg +python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \ +gs://BUCKET_NAME/PREFIX/ For more information, the documentation at https://cloud.google.com/vision/docs. @@ -31,8 +33,11 @@ import argparse import io +import re +from google.cloud import storage from google.cloud import vision +from google.protobuf import json_format # [START def_detect_faces] @@ -636,6 +641,76 @@ def detect_document_uri(uri): # [END def_detect_document_uri] +# [START vision_async_detect_document_ocr] +def async_detect_document(gcs_source_uri, gcs_destination_uri): + """OCR with PDF/TIFF as source files on GCS""" + # Supported mime_types are: 'application/pdf' and 'image/tiff' + mime_type = 'application/pdf' + + # How many pages should be grouped into each json output file. + batch_size = 2 + + client = vision.ImageAnnotatorClient() + + feature = vision.types.Feature( + type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION) + + gcs_source = vision.types.GcsSource(uri=gcs_source_uri) + input_config = vision.types.InputConfig( + gcs_source=gcs_source, mime_type=mime_type) + + gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri) + output_config = vision.types.OutputConfig( + gcs_destination=gcs_destination, batch_size=batch_size) + + async_request = vision.types.AsyncAnnotateFileRequest( + features=[feature], input_config=input_config, + output_config=output_config) + + operation = client.async_batch_annotate_files( + requests=[async_request]) + + print('Waiting for the operation to finish.') + operation.result(timeout=180) + + # Once the request has completed and the output has been + # written to GCS, we can list all the output files. + storage_client = storage.Client() + + match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri) + bucket_name = match.group(1) + prefix = match.group(2) + + bucket = storage_client.get_bucket(bucket_name=bucket_name) + + # List objects with the given prefix. + blob_list = list(bucket.list_blobs(prefix=prefix)) + print('Output files:') + for blob in blob_list: + print(blob.name) + + # Process the first output file from GCS. + # Since we specified batch_size=2, the first response contains + # the first two pages of the input file. + output = blob_list[0] + + json_string = output.download_as_string() + response = json_format.Parse( + json_string, vision.types.AnnotateFileResponse()) + + # The actual response for the first page of the input file. + first_page_response = response.responses[0] + annotation = first_page_response.full_text_annotation + + # Here we print the full text from the first page. + # The response contains more information: + # annotation/pages/blocks/paragraphs/words/symbols + # including confidence scores and bounding boxes + print(u'Full text:\n{}'.format( + annotation.text)) +# [END vision_async_detect_document_ocr] + + def run_local(args): if args.command == 'faces': detect_faces(args.path) @@ -684,6 +759,8 @@ def run_uri(args): detect_document_uri(args.uri) elif args.command == 'web-geo-uri': web_entities_include_geo_results_uri(args.uri) + elif args.command == 'ocr-uri': + async_detect_document(args.uri, args.destination_uri) if __name__ == '__main__': @@ -785,9 +862,14 @@ def run_uri(args): 'document-uri', help=detect_document_uri.__doc__) document_uri_parser.add_argument('uri') + ocr_uri_parser = subparsers.add_parser( + 'ocr-uri', help=async_detect_document.__doc__) + ocr_uri_parser.add_argument('uri') + ocr_uri_parser.add_argument('destination_uri') + args = parser.parse_args() - if ('uri' in args.command): + if 'uri' in args.command: run_uri(args) else: run_local(args) diff --git a/samples/snippets/detect/detect_test.py b/samples/snippets/detect/detect_test.py index 0510d100..f298860b 100644 --- a/samples/snippets/detect/detect_test.py +++ b/samples/snippets/detect/detect_test.py @@ -14,9 +14,14 @@ import os +from google.cloud import storage + import detect BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] +OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT' +GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET) +GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX) def test_labels(capsys): @@ -271,3 +276,20 @@ def test_detect_crop_hints_http(capsys): detect.detect_crop_hints_uri(uri.format(BUCKET)) out, _ = capsys.readouterr() assert 'bounds: (0,0)' in out + + +def test_async_detect_document(capsys): + storage_client = storage.Client() + bucket = storage_client.get_bucket(BUCKET) + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + + detect.async_detect_document( + gcs_source_uri=GCS_SOURCE_URI, + gcs_destination_uri=GCS_DESTINATION_URI) + out, _ = capsys.readouterr() + + assert 'Hodge conjecture' in out + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 3 + + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() diff --git a/samples/snippets/detect/requirements.txt b/samples/snippets/detect/requirements.txt index a6c769ce..c7b2576f 100644 --- a/samples/snippets/detect/requirements.txt +++ b/samples/snippets/detect/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-vision==0.31.0 +google-cloud-vision==0.32.0 google-cloud-storage==1.6.0