diff --git a/packages/google-cloud-documentai/.github/.OwlBot.lock.yaml b/packages/google-cloud-documentai/.github/.OwlBot.lock.yaml index 108063d4dee4..63bf76ea6567 100644 --- a/packages/google-cloud-documentai/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-documentai/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60 + digest: sha256:39ad8c0570e4f5d2d3124a509de4fe975e799e2b97e0f58aed88f8880d5a8b60 diff --git a/packages/google-cloud-documentai/samples/snippets/batch_process_documents_sample.py b/packages/google-cloud-documentai/samples/snippets/batch_process_documents_sample.py index 882170934416..d85948e721af 100644 --- a/packages/google-cloud-documentai/samples/snippets/batch_process_documents_sample.py +++ b/packages/google-cloud-documentai/samples/snippets/batch_process_documents_sample.py @@ -63,9 +63,7 @@ def batch_process_documents( # Location can be 'us' or 'eu' name = f"projects/{project_id}/locations/{location}/processors/{processor_id}" request = documentai.types.document_processor_service.BatchProcessRequest( - name=name, - input_documents=input_config, - document_output_config=output_config, + name=name, input_documents=input_config, document_output_config=output_config, ) operation = client.batch_process_documents(request) diff --git a/packages/google-cloud-documentai/samples/snippets/process_document_form_sample.py b/packages/google-cloud-documentai/samples/snippets/process_document_form_sample.py index f8e523272ba0..06e3d242c8c2 100644 --- a/packages/google-cloud-documentai/samples/snippets/process_document_form_sample.py +++ b/packages/google-cloud-documentai/samples/snippets/process_document_form_sample.py @@ -21,6 +21,7 @@ # processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console # file_path = '/path/to/local/pdf' + def process_document_form_sample( project_id: str, location: str, processor_id: str, file_path: str ): @@ -69,9 +70,9 @@ def process_document_form_sample( for table in page.tables: num_collumns = len(table.header_rows[0].cells) num_rows = len(table.body_rows) - print(f'Table with {num_collumns} columns and {num_rows} rows:') + print(f"Table with {num_collumns} columns and {num_rows} rows:") print_table_info(table, text) - print(f'Found {len(page.form_fields)} form fields:') + print(f"Found {len(page.form_fields)} form fields:") for field in page.form_fields: name = layout_to_text(field.field_name, text) value = layout_to_text(field.field_value, text) @@ -80,17 +81,17 @@ def process_document_form_sample( def print_table_info(table: dict, text: str) -> None: # Print header row - header_row_text = '' + header_row_text = "" for header_cell in table.header_rows[0].cells: header_cell_text = layout_to_text(header_cell.layout, text) - header_row_text += f'{repr(header_cell_text.strip())} | ' - print(f'Collumns: {header_row_text[:-3]}') + header_row_text += f"{repr(header_cell_text.strip())} | " + print(f"Collumns: {header_row_text[:-3]}") # Print first body row - body_row_text = '' + body_row_text = "" for body_cell in table.body_rows[0].cells: body_cell_text = layout_to_text(body_cell.layout, text) - body_row_text += f'{repr(body_cell_text.strip())} | ' - print(f'First row data: {body_row_text[:-3]}\n') + body_row_text += f"{repr(body_cell_text.strip())} | " + print(f"First row data: {body_row_text[:-3]}\n") def layout_to_text(layout: dict, text: str) -> str: diff --git a/packages/google-cloud-documentai/samples/snippets/process_document_ocr_sample.py b/packages/google-cloud-documentai/samples/snippets/process_document_ocr_sample.py index 93cd61e87ef9..9271808085fd 100644 --- a/packages/google-cloud-documentai/samples/snippets/process_document_ocr_sample.py +++ b/packages/google-cloud-documentai/samples/snippets/process_document_ocr_sample.py @@ -21,6 +21,7 @@ # processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console # file_path = '/path/to/local/pdf' + def process_document_ocr_sample( project_id: str, location: str, processor_id: str, file_path: str ) -> None: @@ -78,7 +79,7 @@ def print_detected_langauges(detected_languages: dict) -> None: print(" Detected languages:") for lang in detected_languages: code = lang.language_code - conf_percent = '{:.1%}'.format(lang.confidence) + conf_percent = "{:.1%}".format(lang.confidence) print(f" {code} ({conf_percent} confidence)") diff --git a/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample.py b/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample.py index ba78bc694a0d..ec92c82cbabb 100644 --- a/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample.py +++ b/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample.py @@ -21,6 +21,7 @@ # processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console # file_path = '/path/to/local/pdf' + def process_document_quality_sample( project_id: str, location: str, processor_id: str, file_path: str ): @@ -60,17 +61,18 @@ def process_document_quality_sample( # response. document = result.document for entity in document.entities: - conf_percent = '{:.1%}'.format(entity.confidence) - page_num = '' + conf_percent = "{:.1%}".format(entity.confidence) + page_num = "" try: page_num = str(int(entity.page_anchor.page_refs.page) + 1) except AttributeError: page_num = "1" - print(f'Page {page_num} has a quality score of {conf_percent}:') + print(f"Page {page_num} has a quality score of {conf_percent}:") for prop in entity.properties: - conf_percent = '{:.1%}'.format(prop.confidence) - print(f' * {prop.type_} score of {conf_percent}') + conf_percent = "{:.1%}".format(prop.confidence) + print(f" * {prop.type_} score of {conf_percent}") + # [END documentai_process_quality_document] diff --git a/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample_test.py b/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample_test.py index 84d9d5e20a3c..b025756eeeb9 100644 --- a/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample_test.py +++ b/packages/google-cloud-documentai/samples/snippets/process_document_quality_sample_test.py @@ -36,7 +36,7 @@ def test_process_documents(capsys): expected_strings = [ "Page 1 has a quality score of", "defect_blurry score of 9", - "defect_noisy" + "defect_noisy", ] for expected_string in expected_strings: assert expected_string in out diff --git a/packages/google-cloud-documentai/samples/snippets/process_document_specialized_sample.py b/packages/google-cloud-documentai/samples/snippets/process_document_specialized_sample.py index dfae17aedd7b..e30b9517534f 100644 --- a/packages/google-cloud-documentai/samples/snippets/process_document_specialized_sample.py +++ b/packages/google-cloud-documentai/samples/snippets/process_document_specialized_sample.py @@ -21,6 +21,7 @@ # processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console # file_path = '/path/to/local/pdf' + def process_document_specialized_sample( project_id: str, location: str, processor_id: str, file_path: str ): @@ -62,7 +63,7 @@ def process_document_specialized_sample( # Please see the OCR and other samples for how to parse other data in the # response. document = result.document - print(f'Found {len(document.entities)} entities:') + print(f"Found {len(document.entities)} entities:") for entity in document.entities: # Fields detected. For a full list of fields for each processor see # the processor documentation: @@ -71,7 +72,8 @@ def process_document_specialized_sample( # some other value formats in addition to text are availible # e.g. dates: `entity.normalized_value.date_value.year` text_value = entity.text_anchor.content - conf_percent = '{:.1%}'.format(entity.confidence) - print(f' * {repr(key)}: {repr(text_value)}({conf_percent} confident)') + conf_percent = "{:.1%}".format(entity.confidence) + print(f" * {repr(key)}: {repr(text_value)}({conf_percent} confident)") + # [END documentai_process_specialized_document] diff --git a/packages/google-cloud-documentai/samples/snippets/process_document_splitter_sample.py b/packages/google-cloud-documentai/samples/snippets/process_document_splitter_sample.py index de63f609aed9..9560785424b7 100644 --- a/packages/google-cloud-documentai/samples/snippets/process_document_splitter_sample.py +++ b/packages/google-cloud-documentai/samples/snippets/process_document_splitter_sample.py @@ -21,11 +21,10 @@ # processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console # file_path = '/path/to/local/pdf' + def process_document_splitter_sample( - project_id: str, - location: str, - processor_id: str, - file_path: str): + project_id: str, location: str, processor_id: str, file_path: str +): from google.cloud import documentai_v1beta3 as documentai # You must set the api_endpoint if you use a location other than 'us', e.g.: @@ -61,26 +60,29 @@ def process_document_splitter_sample( # form elements, and entities please see other processors like the OCR, form, # and specalized processors. document = result.document - print(f'Found {len(document.entities)} subdocuments:') + print(f"Found {len(document.entities)} subdocuments:") for entity in document.entities: - conf_percent = '{:.1%}'.format(entity.confidence) + conf_percent = "{:.1%}".format(entity.confidence) pages_range = page_refs_to_string(entity.page_anchor.page_refs) # Print subdocument type information, if available try: doctype = entity.type - print(f'{conf_percent} confident that {pages_range} a "{doctype}" subdocument.') + print( + f'{conf_percent} confident that {pages_range} a "{doctype}" subdocument.' + ) except AttributeError: - print(f'{conf_percent} confident that {pages_range} a subdocument.') + print(f"{conf_percent} confident that {pages_range} a subdocument.") def page_refs_to_string(page_refs: dict) -> str: - ''' Converts a page ref to a string describing the page or page range.''' + """ Converts a page ref to a string describing the page or page range.""" if len(page_refs) == 1: num = str(int(page_refs[0].page) + 1) - return f'page {num} is' + return f"page {num} is" else: start = str(int(page_refs[0].page) + 1) end = str(int(page_refs[1].page) + 1) - return f'pages {start} to {end} are' + return f"pages {start} to {end} are" + # [END documentai_process_splitter_document]