GoogleCloudPlatform · happyhuman · Mar 12, 2019 · Mar 7, 2019 · Mar 7, 2019 · Mar 7, 2019
diff --git a/vision/cloud-client/detect/detect.py b/vision/cloud-client/detect/detect.py
@@ -661,6 +661,95 @@ def detect_document_uri(uri):
 # [END vision_fulltext_detection_gcs]
 
 
+# [START vision_fulltext_detection_pdf]
+def detect_pdf_document(path):
+ """Detects document features in a PDF/TIFF/GIF file."""
+ from google.cloud import vision_v1p4beta1 as vision
+ client = vision.ImageAnnotatorClient()
+
+ with io.open(path, 'rb') as pdf_file:
+ content = pdf_file.read()
+
+ # Can be other like 'image/tiff' or 'image/gif'
+ mime_type = 'application/pdf'
+ file_bytes_input_config = vision.types.InputConfig(
+ content=content, mime_type=mime_type)
+
+ feature = vision.types.Feature(
+ type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
+ image_context = vision.types.ImageContext()
+ # Annotate the first two pages and the last one (max 5 pages)
+ # First page starts at 1, and not 0. Last page is -1.
+ pages = [1, 2, -1]
+
+ online_one_request = vision.types.AnnotateFileRequest(
+ input_config=file_bytes_input_config,
+ features=[feature],
+ image_context=image_context,
+ pages=pages)
+
+ response = client.batch_annotate_files(requests=[online_one_request])
+
+ for image_response in response.responses[0].responses:
+ for page in image_response.full_text_annotation.pages:
+ for block in page.blocks:
+ print('\nBlock confidence: {}\n'.format(block.confidence))
+ for par in block.paragraphs:
+ print('\tParagraph confidence: {}'.format(par.confidence))
+ for word in par.words:
+ symbol_texts = [symbol.text for symbol in word.symbols]
+ word_text = ''.join(symbol_texts)
+ print('\t\tWord text: {} (confidence: {})'.format(
+ word_text, word.confidence))
+ for symbol in word.symbols:
+ print('\t\t\tSymbol: {} (confidence: {})'.format(
+ symbol.text, symbol.confidence))
+# [END vision_fulltext_detection_pdf]
+
+
+# [START vision_fulltext_detection_pdf_gcs]
+def detect_pdf_document_from_gcs(gcs_uri):
+ """Detects document features in a PDF/TIFF/GIF file."""
+ from google.cloud import vision_v1p4beta1 as vision
+ client = vision.ImageAnnotatorClient()
+
+ # Can be other like 'image/tiff' or 'image/gif'
+ mime_type = 'application/pdf'
+ file_bytes_input_config = vision.types.InputConfig(
+ gcs_source=vision.types.GcsSource(uri=gcs_uri), mime_type=mime_type)
+
+ feature = vision.types.Feature(
+ type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
+ image_context = vision.types.ImageContext()
+ # Annotate the first two pages and the last one (max 5 pages)
+ # First page starts at 1, and not 0. Last page is -1.
+ pages = [1, 2, -1]
+
+ online_one_request = vision.types.AnnotateFileRequest(
+ input_config=file_bytes_input_config,
+ features=[feature],
+ image_context=image_context,
+ pages=pages)
+
+ response = client.batch_annotate_files(requests=[online_one_request])
+
+ for image_response in response.responses[0].responses:
+ for page in image_response.full_text_annotation.pages:
+ for block in page.blocks:
+ print('\nBlock confidence: {}\n'.format(block.confidence))
+ for par in block.paragraphs:
+ print('\tParagraph confidence: {}'.format(par.confidence))
+ for word in par.words:
+ symbol_texts = [symbol.text for symbol in word.symbols]
+ word_text = ''.join(symbol_texts)
+ print('\t\tWord text: {} (confidence: {})'.format(
+ word_text, word.confidence))
+ for symbol in word.symbols:
+ print('\t\t\tSymbol: {} (confidence: {})'.format(
+ symbol.text, symbol.confidence))
+# [END vision_fulltext_detection_pdf_gcs]
+
+
 # [START vision_text_detection_pdf_gcs]
 def async_detect_document(gcs_source_uri, gcs_destination_uri):
  """OCR with PDF/TIFF as source files on GCS"""

diff --git a/vision/cloud-client/detect/detect_test.py b/vision/cloud-client/detect/detect_test.py
@@ -252,6 +252,25 @@ def test_detect_document_uri(capsys):
  assert 'class' in out
 
 
+def test_detect_pdf_document(capsys):
+ file_name = os.path.join(
+ os.path.dirname(__file__),
+ 'resources/metamorphosis.pdf')
+ detect.detect_pdf_document(file_name)
+ out, _ = capsys.readouterr()
+ assert 'Symbol' in out
+ assert 'Word text' in out
+
+
+def test_detect_pdf_document_from_gcs(capsys):
+ gcs_uri = 'gs://{}/vision/document_understanding/metamorphosis.pdf' \
+ .format(ASSET_BUCKET)
+ detect.detect_pdf_document_from_gcs(gcs_uri)
+ out, _ = capsys.readouterr()
+ assert 'Symbol' in out
+ assert 'Word text' in out
+
+
 def test_detect_document_http(capsys):
  uri = 'https://storage-download.googleapis.com/{}/vision/text/screen.jpg'
  detect.detect_document_uri(uri.format(ASSET_BUCKET))

diff --git a/vision/cloud-client/detect/resources/metamorphosis.pdf b/vision/cloud-client/detect/resources/metamorphosis.pdf