From 81249dba88f95219e47eb463ab59a3448b498a6a Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Wed, 21 Mar 2018 13:48:32 -0700
Subject: [PATCH 01/12] add docpdf sample

---
 vision/cloud-client/detect/docpdf.py        | 89 +++++++++++++++++++++
 vision/cloud-client/detect/docpdf_test.py   | 36 +++++++++
 vision/cloud-client/detect/requirements.txt |  1 +
 3 files changed, 126 insertions(+)
 create mode 100644 vision/cloud-client/detect/docpdf.py
 create mode 100644 vision/cloud-client/detect/docpdf_test.py

diff --git a/vision/cloud-client/detect/docpdf.py b/vision/cloud-client/detect/docpdf.py
new file mode 100644
index 000000000000..69ec7e158e89
--- /dev/null
+++ b/vision/cloud-client/detect/docpdf.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""OCR with PDF/TIFF as source files on GCS
+
+Example:
+    python docpdf.py --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \
+    --gcs-destination-uri gs://BUCKET_NAME/OCR/
+"""
+
+import argparse
+import json
+import re
+
+from google.cloud import vision_v1p2beta1 as vision
+from google.cloud import storage
+from google.protobuf import json_format
+
+
+def async_detect_document(gcs_source_uri, gcs_destination_uri):
+    # Supported mime_types are: 'application/pdf' and 'image/tiff'
+    mime_type = 'application/pdf'
+
+    # How many pages should be grouped into each json output file.
+    batch_size = 2
+
+    client = vision.ImageAnnotatorClient()
+
+    feature = vision.types.Feature(
+        type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
+
+    gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
+    input_config = vision.types.InputConfig(
+        gcs_source=gcs_source, mime_type=mime_type)
+
+    gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
+    output_config = vision.types.OutputConfig(gcs_destination=gcs_destination, batch_size=batch_size)
+
+    async_request = vision.types.AsyncAnnotateFileRequest(
+        features=[feature], input_config=input_config, output_config=output_config)
+
+    operation = client.async_batch_annotate_files(
+        requests=[async_request])
+
+    print('Waiting for the operation to finish.')
+    result = operation.result(90)
+
+    # Retrieve the first output file from GCS
+    storage_client = storage.Client()
+
+    match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
+    bucket_name = match.group(1)
+    object_name = match.group(2) + 'output-1-to-2.json'
+
+    bucket = storage_client.get_bucket(bucket_name=bucket_name)
+    blob = bucket.blob(blob_name=object_name)
+
+    # Print the full text from the first page.
+    # The response additionally includes individual detected symbol's
+    # confidence and bounding box.
+    json_string = blob.download_as_string()
+    response = json.loads(json_string)
+
+    first_page = response['responses'][0]
+    print(first_page['fullTextAnnotation']['text'])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gcs-source-uri', required=True)
+    parser.add_argument('--gcs-destination-uri', required=True)
+
+    args = parser.parse_args()
+    async_detect_document(args.gcs_source_uri, args.gcs_destination_uri)
+
diff --git a/vision/cloud-client/detect/docpdf_test.py b/vision/cloud-client/detect/docpdf_test.py
new file mode 100644
index 000000000000..82d976054189
--- /dev/null
+++ b/vision/cloud-client/detect/docpdf_test.py
@@ -0,0 +1,36 @@
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from google.cloud import storage
+
+from docpdf import async_detect_document
+
+BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
+OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT'
+GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET)
+GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX)
+
+
+def test_async_detect_document(capsys):
+    async_detect_document(gcs_source_uri=GCS_SOURCE_URI, gcs_destination_uri=GCS_DESTINATION_URI)
+    out, _ = capsys.readouterr()
+
+    assert 'Hodge conjecture' in out
+
+    storage_client = storage.Client()
+    bucket = storage_client.get_bucket(BUCKET)
+    for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
+    	blob.delete()
diff --git a/vision/cloud-client/detect/requirements.txt b/vision/cloud-client/detect/requirements.txt
index 80c8a11ca3c2..ca9d0e400b84 100644
--- a/vision/cloud-client/detect/requirements.txt
+++ b/vision/cloud-client/detect/requirements.txt
@@ -1 +1,2 @@
 google-cloud-vision==0.30.1
+google-cloud-storage==1.6.0

From c375496350dd5a2c3298edc5fac134523bdcfceb Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Mon, 26 Mar 2018 16:48:55 -0700
Subject: [PATCH 02/12] import order

---
 vision/cloud-client/detect/docpdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vision/cloud-client/detect/docpdf.py b/vision/cloud-client/detect/docpdf.py
index 69ec7e158e89..8dedf46c8f27 100644
--- a/vision/cloud-client/detect/docpdf.py
+++ b/vision/cloud-client/detect/docpdf.py
@@ -26,8 +26,8 @@
 import json
 import re
 
-from google.cloud import vision_v1p2beta1 as vision
 from google.cloud import storage
+from google.cloud import vision_v1p2beta1 as vision
 from google.protobuf import json_format
 
 

From 1e3c9411c11973648f97347e8fc5ff71caac3a01 Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 27 Mar 2018 11:02:08 -0700
Subject: [PATCH 03/12] list blobs

---
 vision/cloud-client/detect/docpdf.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/vision/cloud-client/detect/docpdf.py b/vision/cloud-client/detect/docpdf.py
index 8dedf46c8f27..e03005856a4e 100644
--- a/vision/cloud-client/detect/docpdf.py
+++ b/vision/cloud-client/detect/docpdf.py
@@ -36,6 +36,7 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
     mime_type = 'application/pdf'
 
     # How many pages should be grouped into each json output file.
+    # With a file of 5 pages
     batch_size = 2
 
     client = vision.ImageAnnotatorClient()
@@ -57,22 +58,29 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
         requests=[async_request])
 
     print('Waiting for the operation to finish.')
-    result = operation.result(90)
+    result = operation.result(timeout=90)
 
-    # Retrieve the first output file from GCS
+    # Once the request has completed and the output has been
+    # written to GCS, we can list all the output files.
     storage_client = storage.Client()
 
     match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
     bucket_name = match.group(1)
-    object_name = match.group(2) + 'output-1-to-2.json'
+    prefix = match.group(2)
 
     bucket = storage_client.get_bucket(bucket_name=bucket_name)
-    blob = bucket.blob(blob_name=object_name)
+
+    # List objects with the given prefix.
+    blob_list = list(bucket.list_blobs(prefix=prefix))
+    print(blob_list)
+
+    #Retrieve the first output file from GCS.
+    first_output = blob_list[0]
 
     # Print the full text from the first page.
     # The response additionally includes individual detected symbol's
     # confidence and bounding box.
-    json_string = blob.download_as_string()
+    json_string = first_output.download_as_string()
     response = json.loads(json_string)
 
     first_page = response['responses'][0]

From a2cd808eb9cc544d0e1e9d834bb259348d2870fc Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 27 Mar 2018 11:05:35 -0700
Subject: [PATCH 04/12] filename change

---
 vision/cloud-client/detect/docpdf.py      | 97 -----------------------
 vision/cloud-client/detect/docpdf_test.py | 36 ---------
 2 files changed, 133 deletions(-)
 delete mode 100644 vision/cloud-client/detect/docpdf.py
 delete mode 100644 vision/cloud-client/detect/docpdf_test.py

diff --git a/vision/cloud-client/detect/docpdf.py b/vision/cloud-client/detect/docpdf.py
deleted file mode 100644
index e03005856a4e..000000000000
--- a/vision/cloud-client/detect/docpdf.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2018 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""OCR with PDF/TIFF as source files on GCS
-
-Example:
-    python docpdf.py --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \
-    --gcs-destination-uri gs://BUCKET_NAME/OCR/
-"""
-
-import argparse
-import json
-import re
-
-from google.cloud import storage
-from google.cloud import vision_v1p2beta1 as vision
-from google.protobuf import json_format
-
-
-def async_detect_document(gcs_source_uri, gcs_destination_uri):
-    # Supported mime_types are: 'application/pdf' and 'image/tiff'
-    mime_type = 'application/pdf'
-
-    # How many pages should be grouped into each json output file.
-    # With a file of 5 pages
-    batch_size = 2
-
-    client = vision.ImageAnnotatorClient()
-
-    feature = vision.types.Feature(
-        type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
-
-    gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
-    input_config = vision.types.InputConfig(
-        gcs_source=gcs_source, mime_type=mime_type)
-
-    gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
-    output_config = vision.types.OutputConfig(gcs_destination=gcs_destination, batch_size=batch_size)
-
-    async_request = vision.types.AsyncAnnotateFileRequest(
-        features=[feature], input_config=input_config, output_config=output_config)
-
-    operation = client.async_batch_annotate_files(
-        requests=[async_request])
-
-    print('Waiting for the operation to finish.')
-    result = operation.result(timeout=90)
-
-    # Once the request has completed and the output has been
-    # written to GCS, we can list all the output files.
-    storage_client = storage.Client()
-
-    match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
-    bucket_name = match.group(1)
-    prefix = match.group(2)
-
-    bucket = storage_client.get_bucket(bucket_name=bucket_name)
-
-    # List objects with the given prefix.
-    blob_list = list(bucket.list_blobs(prefix=prefix))
-    print(blob_list)
-
-    #Retrieve the first output file from GCS.
-    first_output = blob_list[0]
-
-    # Print the full text from the first page.
-    # The response additionally includes individual detected symbol's
-    # confidence and bounding box.
-    json_string = first_output.download_as_string()
-    response = json.loads(json_string)
-
-    first_page = response['responses'][0]
-    print(first_page['fullTextAnnotation']['text'])
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--gcs-source-uri', required=True)
-    parser.add_argument('--gcs-destination-uri', required=True)
-
-    args = parser.parse_args()
-    async_detect_document(args.gcs_source_uri, args.gcs_destination_uri)
-
diff --git a/vision/cloud-client/detect/docpdf_test.py b/vision/cloud-client/detect/docpdf_test.py
deleted file mode 100644
index 82d976054189..000000000000
--- a/vision/cloud-client/detect/docpdf_test.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright 2018 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from google.cloud import storage
-
-from docpdf import async_detect_document
-
-BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
-OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT'
-GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET)
-GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX)
-
-
-def test_async_detect_document(capsys):
-    async_detect_document(gcs_source_uri=GCS_SOURCE_URI, gcs_destination_uri=GCS_DESTINATION_URI)
-    out, _ = capsys.readouterr()
-
-    assert 'Hodge conjecture' in out
-
-    storage_client = storage.Client()
-    bucket = storage_client.get_bucket(BUCKET)
-    for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
-    	blob.delete()

From c3fa66448e0f26865e5ffd07077397124874cb8a Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 27 Mar 2018 11:22:07 -0700
Subject: [PATCH 05/12] add the renamed files

---
 vision/cloud-client/detect/detect_pdf.py      | 97 +++++++++++++++++++
 vision/cloud-client/detect/detect_pdf_test.py | 36 +++++++
 2 files changed, 133 insertions(+)
 create mode 100644 vision/cloud-client/detect/detect_pdf.py
 create mode 100644 vision/cloud-client/detect/detect_pdf_test.py

diff --git a/vision/cloud-client/detect/detect_pdf.py b/vision/cloud-client/detect/detect_pdf.py
new file mode 100644
index 000000000000..e03005856a4e
--- /dev/null
+++ b/vision/cloud-client/detect/detect_pdf.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""OCR with PDF/TIFF as source files on GCS
+
+Example:
+    python docpdf.py --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \
+    --gcs-destination-uri gs://BUCKET_NAME/OCR/
+"""
+
+import argparse
+import json
+import re
+
+from google.cloud import storage
+from google.cloud import vision_v1p2beta1 as vision
+from google.protobuf import json_format
+
+
+def async_detect_document(gcs_source_uri, gcs_destination_uri):
+    # Supported mime_types are: 'application/pdf' and 'image/tiff'
+    mime_type = 'application/pdf'
+
+    # How many pages should be grouped into each json output file.
+    # With a file of 5 pages
+    batch_size = 2
+
+    client = vision.ImageAnnotatorClient()
+
+    feature = vision.types.Feature(
+        type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
+
+    gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
+    input_config = vision.types.InputConfig(
+        gcs_source=gcs_source, mime_type=mime_type)
+
+    gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
+    output_config = vision.types.OutputConfig(gcs_destination=gcs_destination, batch_size=batch_size)
+
+    async_request = vision.types.AsyncAnnotateFileRequest(
+        features=[feature], input_config=input_config, output_config=output_config)
+
+    operation = client.async_batch_annotate_files(
+        requests=[async_request])
+
+    print('Waiting for the operation to finish.')
+    result = operation.result(timeout=90)
+
+    # Once the request has completed and the output has been
+    # written to GCS, we can list all the output files.
+    storage_client = storage.Client()
+
+    match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
+    bucket_name = match.group(1)
+    prefix = match.group(2)
+
+    bucket = storage_client.get_bucket(bucket_name=bucket_name)
+
+    # List objects with the given prefix.
+    blob_list = list(bucket.list_blobs(prefix=prefix))
+    print(blob_list)
+
+    #Retrieve the first output file from GCS.
+    first_output = blob_list[0]
+
+    # Print the full text from the first page.
+    # The response additionally includes individual detected symbol's
+    # confidence and bounding box.
+    json_string = first_output.download_as_string()
+    response = json.loads(json_string)
+
+    first_page = response['responses'][0]
+    print(first_page['fullTextAnnotation']['text'])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gcs-source-uri', required=True)
+    parser.add_argument('--gcs-destination-uri', required=True)
+
+    args = parser.parse_args()
+    async_detect_document(args.gcs_source_uri, args.gcs_destination_uri)
+
diff --git a/vision/cloud-client/detect/detect_pdf_test.py b/vision/cloud-client/detect/detect_pdf_test.py
new file mode 100644
index 000000000000..1ba079283b6b
--- /dev/null
+++ b/vision/cloud-client/detect/detect_pdf_test.py
@@ -0,0 +1,36 @@
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from google.cloud import storage
+
+from detect_pdf import async_detect_document
+
+BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
+OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT'
+GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET)
+GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX)
+
+
+def test_async_detect_document(capsys):
+    async_detect_document(gcs_source_uri=GCS_SOURCE_URI, gcs_destination_uri=GCS_DESTINATION_URI)
+    out, _ = capsys.readouterr()
+
+    assert 'Hodge conjecture' in out
+
+    storage_client = storage.Client()
+    bucket = storage_client.get_bucket(BUCKET)
+    for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
+    	blob.delete()

From 8d4274cf2d66dff182659ed2471f470223ba3160 Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Thu, 29 Mar 2018 10:38:30 -0700
Subject: [PATCH 06/12] parse json string to AnnotateFileResponse message

---
 vision/cloud-client/detect/detect_pdf.py | 38 ++++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/vision/cloud-client/detect/detect_pdf.py b/vision/cloud-client/detect/detect_pdf.py
index e03005856a4e..fa0bc793d7b2 100644
--- a/vision/cloud-client/detect/detect_pdf.py
+++ b/vision/cloud-client/detect/detect_pdf.py
@@ -18,12 +18,12 @@
 """OCR with PDF/TIFF as source files on GCS
 
 Example:
-    python docpdf.py --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \
+    python detect_pdf.py \
+    --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \
     --gcs-destination-uri gs://BUCKET_NAME/OCR/
 """
 
 import argparse
-import json
 import re
 
 from google.cloud import storage
@@ -31,6 +31,7 @@
 from google.protobuf import json_format
 
 
+# [START vision_async_detect_document_ocr]
 def async_detect_document(gcs_source_uri, gcs_destination_uri):
     # Supported mime_types are: 'application/pdf' and 'image/tiff'
     mime_type = 'application/pdf'
@@ -49,16 +50,18 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
         gcs_source=gcs_source, mime_type=mime_type)
 
     gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
-    output_config = vision.types.OutputConfig(gcs_destination=gcs_destination, batch_size=batch_size)
+    output_config = vision.types.OutputConfig(
+        gcs_destination=gcs_destination, batch_size=batch_size)
 
     async_request = vision.types.AsyncAnnotateFileRequest(
-        features=[feature], input_config=input_config, output_config=output_config)
+        features=[feature], input_config=input_config,
+        output_config=output_config)
 
     operation = client.async_batch_annotate_files(
         requests=[async_request])
 
     print('Waiting for the operation to finish.')
-    result = operation.result(timeout=90)
+    operation.result(timeout=90)
 
     # Once the request has completed and the output has been
     # written to GCS, we can list all the output files.
@@ -72,19 +75,29 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
 
     # List objects with the given prefix.
     blob_list = list(bucket.list_blobs(prefix=prefix))
-    print(blob_list)
+    print('Output files:')
+    for blob in blob_list:
+        print(blob.name)
 
-    #Retrieve the first output file from GCS.
-    first_output = blob_list[0]
+    # Process the first output file from GCS.
+    # Since we specified batch_size=2, the first response contains
+    # the first two pages of the input file.
+    output = blob_list[0]
+
+    json_string = output.download_as_string()
+    response = json_format.Parse(
+        json_string, vision.types.AnnotateFileResponse())
+
+    # The actual response for the first page of the input file.
+    first_page_response = response.responses[0]
 
     # Print the full text from the first page.
     # The response additionally includes individual detected symbol's
     # confidence and bounding box.
-    json_string = first_output.download_as_string()
-    response = json.loads(json_string)
+    print(u'Full text:\n{}'.format(
+        first_page_response.full_text_annotation.text))
 
-    first_page = response['responses'][0]
-    print(first_page['fullTextAnnotation']['text'])
+# [END vision_async_detect_document_ocr]
 
 
 if __name__ == '__main__':
@@ -94,4 +107,3 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
 
     args = parser.parse_args()
     async_detect_document(args.gcs_source_uri, args.gcs_destination_uri)
-

From e6803e21bae170b372451c8815be6133a2fc8edd Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Thu, 29 Mar 2018 11:26:28 -0700
Subject: [PATCH 07/12] show more of the response

---
 vision/cloud-client/detect/detect_pdf.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/vision/cloud-client/detect/detect_pdf.py b/vision/cloud-client/detect/detect_pdf.py
index fa0bc793d7b2..344194dacc33 100644
--- a/vision/cloud-client/detect/detect_pdf.py
+++ b/vision/cloud-client/detect/detect_pdf.py
@@ -92,11 +92,29 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
     first_page_response = response.responses[0]
 
     # Print the full text from the first page.
-    # The response additionally includes individual detected symbol's
-    # confidence and bounding box.
     print(u'Full text:\n{}'.format(
         first_page_response.full_text_annotation.text))
 
+    # The response additionally includes individual detected symbol's
+    # confidence and bounding box.
+    for page in first_page_response.full_text_annotation.pages:
+        for block in page.blocks:
+            print('\nBlock confidence: {}\n'.format(block.confidence))
+
+            for paragraph in block.paragraphs:
+                print('Paragraph confidence: {}'.format(
+                    paragraph.confidence))
+
+                for word in paragraph.words:
+                    word_text = ''.join([
+                        symbol.text for symbol in word.symbols
+                    ])
+                    print(u'Word text: {} (confidence: {})'.format(
+                        word_text, word.confidence))
+
+                    for symbol in word.symbols:
+                        print(u'\tSymbol: {} (confidence: {})'.format(
+                            symbol.text, symbol.confidence))
 # [END vision_async_detect_document_ocr]
 
 

From da70df0d4c8f32a2c24e0359afa09c0423ec3005 Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Fri, 30 Mar 2018 23:25:15 -0700
Subject: [PATCH 08/12] simplify response processing to better focus on how to
 make the request

---
 vision/cloud-client/detect/detect_pdf.py | 31 ++++++------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/vision/cloud-client/detect/detect_pdf.py b/vision/cloud-client/detect/detect_pdf.py
index 344194dacc33..e48c15cdd64b 100644
--- a/vision/cloud-client/detect/detect_pdf.py
+++ b/vision/cloud-client/detect/detect_pdf.py
@@ -20,7 +20,7 @@
 Example:
     python detect_pdf.py \
     --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \
-    --gcs-destination-uri gs://BUCKET_NAME/OCR/
+    --gcs-destination-uri gs://BUCKET_NAME/PREFIX/
 """
 
 import argparse
@@ -90,31 +90,14 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
 
     # The actual response for the first page of the input file.
     first_page_response = response.responses[0]
+    annotation = first_page_response.full_text_annotation
 
-    # Print the full text from the first page.
+    # Here we print the full text from the first page.
+    # The response contains more information:
+    # annotation/pages/blocks/paragraphs/words/symbols
+    # including conficence score and bounding boxes
     print(u'Full text:\n{}'.format(
-        first_page_response.full_text_annotation.text))
-
-    # The response additionally includes individual detected symbol's
-    # confidence and bounding box.
-    for page in first_page_response.full_text_annotation.pages:
-        for block in page.blocks:
-            print('\nBlock confidence: {}\n'.format(block.confidence))
-
-            for paragraph in block.paragraphs:
-                print('Paragraph confidence: {}'.format(
-                    paragraph.confidence))
-
-                for word in paragraph.words:
-                    word_text = ''.join([
-                        symbol.text for symbol in word.symbols
-                    ])
-                    print(u'Word text: {} (confidence: {})'.format(
-                        word_text, word.confidence))
-
-                    for symbol in word.symbols:
-                        print(u'\tSymbol: {} (confidence: {})'.format(
-                            symbol.text, symbol.confidence))
+        annotation.text))
 # [END vision_async_detect_document_ocr]
 
 

From 18276361dab88ba0174c166c3b2a7ae79ace162e Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 3 Apr 2018 09:26:02 -0700
Subject: [PATCH 09/12] fix typo

---
 vision/cloud-client/detect/detect_pdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vision/cloud-client/detect/detect_pdf.py b/vision/cloud-client/detect/detect_pdf.py
index e48c15cdd64b..a728d0522a58 100644
--- a/vision/cloud-client/detect/detect_pdf.py
+++ b/vision/cloud-client/detect/detect_pdf.py
@@ -95,7 +95,7 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri):
     # Here we print the full text from the first page.
     # The response contains more information:
     # annotation/pages/blocks/paragraphs/words/symbols
-    # including conficence score and bounding boxes
+    # including confidence scores and bounding boxes
     print(u'Full text:\n{}'.format(
         annotation.text))
 # [END vision_async_detect_document_ocr]

From fb688ce2cdd9b14e5844ee379a75208e9b10720a Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 3 Apr 2018 13:09:13 -0700
Subject: [PATCH 10/12] linter

---
 vision/cloud-client/detect/detect_pdf_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vision/cloud-client/detect/detect_pdf_test.py b/vision/cloud-client/detect/detect_pdf_test.py
index 1ba079283b6b..0401f0bd42d1 100644
--- a/vision/cloud-client/detect/detect_pdf_test.py
+++ b/vision/cloud-client/detect/detect_pdf_test.py
@@ -25,7 +25,8 @@
 
 
 def test_async_detect_document(capsys):
-    async_detect_document(gcs_source_uri=GCS_SOURCE_URI, gcs_destination_uri=GCS_DESTINATION_URI)
+    async_detect_document(gcs_source_uri=GCS_SOURCE_URI,
+    	gcs_destination_uri=GCS_DESTINATION_URI)
     out, _ = capsys.readouterr()
 
     assert 'Hodge conjecture' in out

From 2a1c5b3432c1f48e31a98a907a3cb07bef1395ea Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 3 Apr 2018 14:38:38 -0700
Subject: [PATCH 11/12] linter

---
 vision/cloud-client/detect/detect_pdf_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vision/cloud-client/detect/detect_pdf_test.py b/vision/cloud-client/detect/detect_pdf_test.py
index 0401f0bd42d1..66663ab4da7d 100644
--- a/vision/cloud-client/detect/detect_pdf_test.py
+++ b/vision/cloud-client/detect/detect_pdf_test.py
@@ -26,7 +26,7 @@
 
 def test_async_detect_document(capsys):
     async_detect_document(gcs_source_uri=GCS_SOURCE_URI,
-    	gcs_destination_uri=GCS_DESTINATION_URI)
+        gcs_destination_uri=GCS_DESTINATION_URI)
     out, _ = capsys.readouterr()
 
     assert 'Hodge conjecture' in out
@@ -34,4 +34,4 @@ def test_async_detect_document(capsys):
     storage_client = storage.Client()
     bucket = storage_client.get_bucket(BUCKET)
     for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
-    	blob.delete()
+        blob.delete()

From 39b19d120e007bdb9b23cfe1393939ae5ce3845f Mon Sep 17 00:00:00 2001
From: Yu-Han Liu <yuhanliu@google.com>
Date: Tue, 3 Apr 2018 16:35:03 -0700
Subject: [PATCH 12/12] linter

---
 vision/cloud-client/detect/detect_pdf_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vision/cloud-client/detect/detect_pdf_test.py b/vision/cloud-client/detect/detect_pdf_test.py
index 66663ab4da7d..f0f0b5f7126d 100644
--- a/vision/cloud-client/detect/detect_pdf_test.py
+++ b/vision/cloud-client/detect/detect_pdf_test.py
@@ -25,7 +25,8 @@
 
 
 def test_async_detect_document(capsys):
-    async_detect_document(gcs_source_uri=GCS_SOURCE_URI,
+    async_detect_document(
+        gcs_source_uri=GCS_SOURCE_URI,
         gcs_destination_uri=GCS_DESTINATION_URI)
     out, _ = capsys.readouterr()