[DI] Fix layout extracting issues (#35913)

Azure · Jun 26, 2024 · 15c50b7 · 15c50b7
1 parent 48590d5
commit 15c50b7
Showing 14 changed files with 268 additions and 80 deletions.
diff --git a/sdk/documentintelligence/azure-ai-documentintelligence/README.md b/sdk/documentintelligence/azure-ai-documentintelligence/README.md
@@ -237,30 +237,47 @@ for page in result.pages:
             words = get_words(page, line)
             print(
                 f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
-                f"within bounding polygon '{line.polygon}'"
+                f"within bounding polygon '{format_polygon(line.polygon)}'"
             )
 
-            for word in words:
-                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+    if page.words:
+        for word in page.words:
+            print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
     if page.selection_marks:
         for selection_mark in page.selection_marks:
             print(
                 f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
             )
 
+if result.paragraphs:
+    print(f"----Detected #{len(result.paragraphs)} paragraphs in the document----")
+    # Sort all paragraphs by span's offset to read in the right order.
+    result.paragraphs.sort(key=lambda p: (p.spans.sort(key=lambda s: s.offset), p.spans[0].offset))
+    print("-----Print sorted paragraphs-----")
+    for paragraph in result.paragraphs:
+        print(
+            f"Found paragraph with role: '{paragraph.role}' within {format_bounding_region(paragraph.bounding_regions)} bounding region"
+        )
+        print(f"...with content: '{paragraph.content}'")
+        print(f"...with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}")
+
 if result.tables:
     for table_idx, table in enumerate(result.tables):
         print(f"Table # {table_idx} has {table.row_count} rows and " f"{table.column_count} columns")
         if table.bounding_regions:
             for region in table.bounding_regions:
-                print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                print(
+                    f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                )
         for cell in table.cells:
             print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
             if cell.bounding_regions:
                 for region in cell.bounding_regions:
-                    print(f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'")
+                    print(
+                        f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'"
+                    )
 
 print("----------------------------------------")
 ```
@@ -302,11 +319,14 @@ print("----Key-value pairs found in document----")
 if result.key_value_pairs:
     for kv_pair in result.key_value_pairs:
         if kv_pair.key:
-            print(f"Key '{kv_pair.key.content}' found within " f"'{kv_pair.key.bounding_regions}' bounding regions")
+            print(
+                f"Key '{kv_pair.key.content}' found within "
+                f"'{format_bounding_region(kv_pair.key.bounding_regions)}' bounding regions"
+            )
         if kv_pair.value:
             print(
                 f"Value '{kv_pair.value.content}' found within "
-                f"'{kv_pair.value.bounding_regions}' bounding regions\n"
+                f"'{format_bounding_region(kv_pair.value.bounding_regions)}' bounding regions\n"
             )
 
 for page in result.pages:
@@ -318,17 +338,18 @@ for page in result.pages:
             words = get_words(page.words, line)
             print(
                 f"...Line #{line_idx} has {len(words)} words and text '{line.content}' within "
-                f"bounding polygon '{line.polygon}'"
+                f"bounding polygon '{format_polygon(line.polygon)}'"
             )
 
-            for word in words:
-                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+    if page.words:
+        for word in page.words:
+            print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
     if page.selection_marks:
         for selection_mark in page.selection_marks:
             print(
                 f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                f"'{selection_mark.polygon}' and has a confidence of "
+                f"'{format_polygon(selection_mark.polygon)}' and has a confidence of "
                 f"{selection_mark.confidence}"
             )
 
@@ -337,13 +358,15 @@ if result.tables:
         print(f"Table # {table_idx} has {table.row_count} rows and {table.column_count} columns")
         if table.bounding_regions:
             for region in table.bounding_regions:
-                print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                print(
+                    f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                )
         for cell in table.cells:
             print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
             if cell.bounding_regions:
                 for region in cell.bounding_regions:
                     print(
-                        f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'\n"
+                        f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'\n"
                     )
 print("----------------------------------------")
 ```

diff --git a/...lligence/azure-ai-documentintelligence/samples/aio/sample_analyze_addon_barcodes_async.py b/...lligence/azure-ai-documentintelligence/samples/aio/sample_analyze_addon_barcodes_async.py
@@ -43,6 +43,12 @@
 import os
 
 
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 async def analyze_barcodes():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -82,7 +88,7 @@ async def analyze_barcodes():
                 print(f"- Barcode #{barcode_idx}: {barcode.value}")
                 print(f"  Kind: {barcode.kind}")
                 print(f"  Confidence: {barcode.confidence}")
-                print(f"  Bounding regions: {barcode.polygon}")
+                print(f"  Bounding regions: {format_polygon(barcode.polygon)}")
 
     print("----------------------------------------")
     # [END analyze_barcodes]

diff --git a/...lligence/azure-ai-documentintelligence/samples/aio/sample_analyze_addon_formulas_async.py b/...lligence/azure-ai-documentintelligence/samples/aio/sample_analyze_addon_formulas_async.py
@@ -43,6 +43,12 @@
 import os
 
 
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 async def analyze_formulas():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -85,13 +91,13 @@ async def analyze_formulas():
             for formula_idx, formula in enumerate(inline_formulas):
                 print(f"- Inline #{formula_idx}: {formula.value}")
                 print(f"  Confidence: {formula.confidence}")
-                print(f"  Bounding regions: {formula.polygon}")
+                print(f"  Bounding regions: {format_polygon(formula.polygon)}")
 
             print(f"\nDetected {len(display_formulas)} display formulas.")
             for formula_idx, formula in enumerate(display_formulas):
                 print(f"- Display #{formula_idx}: {formula.value}")
                 print(f"  Confidence: {formula.confidence}")
-                print(f"  Bounding regions: {formula.polygon}")
+                print(f"  Bounding regions: {format_polygon(formula.polygon)}")
 
     print("----------------------------------------")
     # [END analyze_formulas]

diff --git a/...elligence/azure-ai-documentintelligence/samples/aio/sample_analyze_addon_highres_async.py b/...elligence/azure-ai-documentintelligence/samples/aio/sample_analyze_addon_highres_async.py
@@ -58,6 +58,12 @@ def _in_span(word, spans):
     return False
 
 
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 async def analyze_with_highres():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -102,30 +108,35 @@ async def analyze_with_highres():
                 words = get_words(page, line)
                 print(
                     f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
-                    f"within bounding polygon '{line.polygon}'"
+                    f"within bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
                 )
 
     if result.tables:
         for table_idx, table in enumerate(result.tables):
             print(f"Table # {table_idx} has {table.row_count} rows and " f"{table.column_count} columns")
             if table.bounding_regions:
                 for region in table.bounding_regions:
-                    print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                    print(
+                        f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                    )
             for cell in table.cells:
                 print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
                 if cell.bounding_regions:
                     for region in cell.bounding_regions:
-                        print(f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'")
+                        print(
+                            f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'"
+                        )
 
     print("----------------------------------------")
     # [END analyze_with_highres]

diff --git a/...gence/azure-ai-documentintelligence/samples/aio/sample_analyze_general_documents_async.py b/...gence/azure-ai-documentintelligence/samples/aio/sample_analyze_general_documents_async.py
@@ -40,6 +40,18 @@ def _in_span(word, spans):
     return False
 
 
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)
+
+
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 async def analyze_general_documents():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -79,11 +91,14 @@ async def analyze_general_documents():
     if result.key_value_pairs:
         for kv_pair in result.key_value_pairs:
             if kv_pair.key:
-                print(f"Key '{kv_pair.key.content}' found within " f"'{kv_pair.key.bounding_regions}' bounding regions")
+                print(
+                    f"Key '{kv_pair.key.content}' found within "
+                    f"'{format_bounding_region(kv_pair.key.bounding_regions)}' bounding regions"
+                )
             if kv_pair.value:
                 print(
                     f"Value '{kv_pair.value.content}' found within "
-                    f"'{kv_pair.value.bounding_regions}' bounding regions\n"
+                    f"'{format_bounding_region(kv_pair.value.bounding_regions)}' bounding regions\n"
                 )
 
     for page in result.pages:
@@ -95,17 +110,18 @@ async def analyze_general_documents():
                 words = get_words(page.words, line)
                 print(
                     f"...Line #{line_idx} has {len(words)} words and text '{line.content}' within "
-                    f"bounding polygon '{line.polygon}'"
+                    f"bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of "
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of "
                     f"{selection_mark.confidence}"
                 )
 
@@ -114,13 +130,15 @@ async def analyze_general_documents():
             print(f"Table # {table_idx} has {table.row_count} rows and {table.column_count} columns")
             if table.bounding_regions:
                 for region in table.bounding_regions:
-                    print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                    print(
+                        f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                    )
             for cell in table.cells:
                 print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
                 if cell.bounding_regions:
                     for region in cell.bounding_regions:
                         print(
-                            f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'\n"
+                            f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'\n"
                         )
     print("----------------------------------------")
     # [END analyze_general_documents]

diff --git a/...mentintelligence/azure-ai-documentintelligence/samples/aio/sample_analyze_layout_async.py b/...mentintelligence/azure-ai-documentintelligence/samples/aio/sample_analyze_layout_async.py
@@ -44,13 +44,25 @@ def _in_span(word, spans):
     return False
 
 
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)
+
+
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 async def analyze_layout():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
             os.path.abspath(__file__),
             "..",
             "..",
-            "./sample_forms/forms/form_selection_mark.png",
+            "./sample_forms/forms/tabular_and_general_data.docx",
         )
     )
 
@@ -84,30 +96,47 @@ async def analyze_layout():
                 words = get_words(page, line)
                 print(
                     f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
-                    f"within bounding polygon '{line.polygon}'"
+                    f"within bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
                 )
 
+    if result.paragraphs:
+        print(f"----Detected #{len(result.paragraphs)} paragraphs in the document----")
+        # Sort all paragraphs by span's offset to read in the right order.
+        result.paragraphs.sort(key=lambda p: (p.spans.sort(key=lambda s: s.offset), p.spans[0].offset))
+        print("-----Print sorted paragraphs-----")
+        for paragraph in result.paragraphs:
+            print(
+                f"Found paragraph with role: '{paragraph.role}' within {format_bounding_region(paragraph.bounding_regions)} bounding region"
+            )
+            print(f"...with content: '{paragraph.content}'")
+            print(f"...with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}")
+
     if result.tables:
         for table_idx, table in enumerate(result.tables):
             print(f"Table # {table_idx} has {table.row_count} rows and " f"{table.column_count} columns")
             if table.bounding_regions:
                 for region in table.bounding_regions:
-                    print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                    print(
+                        f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                    )
             for cell in table.cells:
                 print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
                 if cell.bounding_regions:
                     for region in cell.bounding_regions:
-                        print(f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'")
+                        print(
+                            f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'"
+                        )
 
     print("----------------------------------------")
     # [END extract_layout]

diff --git a/...cumentintelligence/azure-ai-documentintelligence/samples/aio/sample_analyze_read_async.py b/...cumentintelligence/azure-ai-documentintelligence/samples/aio/sample_analyze_read_async.py
@@ -40,6 +40,18 @@ def _in_span(word, spans):
     return False
 
 
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)
+
+
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 async def analyze_read():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -91,7 +103,7 @@ async def analyze_read():
             for line_idx, line in enumerate(page.lines):
                 words = get_words(page, line)
                 print(
-                    f"...Line # {line_idx} has {len(words)} words and text '{line.content}' within bounding polygon '{line.polygon}'"
+                    f"...Line # {line_idx} has {len(words)} words and text '{line.content}' within bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
                 for word in words:
@@ -101,21 +113,20 @@ async def analyze_read():
             for selection_mark in page.selection_marks:
                 print(
                     f"...Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
                 )
 
     if result.paragraphs:
         print(f"----Detected #{len(result.paragraphs)} paragraphs in the document----")
-        for paragraph in result.paragraphs:
-            print(f"Found paragraph with role: '{paragraph.role}' within {paragraph.bounding_regions} bounding region")
-            print(f"...with content: '{paragraph.content}'")
-
+        # Sort all paragraphs by span's offset to read in the right order.
         result.paragraphs.sort(key=lambda p: (p.spans.sort(key=lambda s: s.offset), p.spans[0].offset))
         print("-----Print sorted paragraphs-----")
-        for idx, paragraph in enumerate(result.paragraphs):
+        for paragraph in result.paragraphs:
             print(
-                f"...paragraph:{idx} with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}"
+                f"Found paragraph with role: '{paragraph.role}' within {format_bounding_region(paragraph.bounding_regions)} bounding region"
             )
+            print(f"...with content: '{paragraph.content}'")
+            print(f"...with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}")
 
     print("----------------------------------------")
 

diff --git a/...cumentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_barcodes.py b/...cumentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_barcodes.py
@@ -42,6 +42,12 @@
 import os
 
 
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 def analyze_barcodes():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -79,7 +85,7 @@ def analyze_barcodes():
                 print(f"- Barcode #{barcode_idx}: {barcode.value}")
                 print(f"  Kind: {barcode.kind}")
                 print(f"  Confidence: {barcode.confidence}")
-                print(f"  Bounding regions: {barcode.polygon}")
+                print(f"  Bounding regions: {format_polygon(barcode.polygon)}")
 
     print("----------------------------------------")
     # [END analyze_barcodes]

diff --git a/...cumentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_formulas.py b/...cumentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_formulas.py
@@ -42,6 +42,12 @@
 import os
 
 
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 def analyze_formulas():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -82,13 +88,13 @@ def analyze_formulas():
             for formula_idx, formula in enumerate(inline_formulas):
                 print(f"- Inline #{formula_idx}: {formula.value}")
                 print(f"  Confidence: {formula.confidence}")
-                print(f"  Bounding regions: {formula.polygon}")
+                print(f"  Bounding regions: {format_polygon(formula.polygon)}")
 
             print(f"\nDetected {len(display_formulas)} display formulas.")
             for formula_idx, formula in enumerate(display_formulas):
                 print(f"- Display #{formula_idx}: {formula.value}")
                 print(f"  Confidence: {formula.confidence}")
-                print(f"  Bounding regions: {formula.polygon}")
+                print(f"  Bounding regions: {format_polygon(formula.polygon)}")
 
     print("----------------------------------------")
     # [END analyze_formulas]

diff --git a/...ocumentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_highres.py b/...ocumentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_highres.py
@@ -57,6 +57,12 @@ def _in_span(word, spans):
     return False
 
 
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 def analyze_with_highres():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -99,30 +105,35 @@ def analyze_with_highres():
                 words = get_words(page, line)
                 print(
                     f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
-                    f"within bounding polygon '{line.polygon}'"
+                    f"within bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
                 )
 
     if result.tables:
         for table_idx, table in enumerate(result.tables):
             print(f"Table # {table_idx} has {table.row_count} rows and " f"{table.column_count} columns")
             if table.bounding_regions:
                 for region in table.bounding_regions:
-                    print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                    print(
+                        f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                    )
             for cell in table.cells:
                 print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
                 if cell.bounding_regions:
                     for region in cell.bounding_regions:
-                        print(f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'")
+                        print(
+                            f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'"
+                        )
 
     print("----------------------------------------")
     # [END analyze_with_highres]

diff --git a/...entintelligence/azure-ai-documentintelligence/samples/sample_analyze_general_documents.py b/...entintelligence/azure-ai-documentintelligence/samples/sample_analyze_general_documents.py
@@ -39,6 +39,18 @@ def _in_span(word, spans):
     return False
 
 
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)
+
+
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 def analyze_general_documents():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -76,11 +88,14 @@ def analyze_general_documents():
     if result.key_value_pairs:
         for kv_pair in result.key_value_pairs:
             if kv_pair.key:
-                print(f"Key '{kv_pair.key.content}' found within " f"'{kv_pair.key.bounding_regions}' bounding regions")
+                print(
+                    f"Key '{kv_pair.key.content}' found within "
+                    f"'{format_bounding_region(kv_pair.key.bounding_regions)}' bounding regions"
+                )
             if kv_pair.value:
                 print(
                     f"Value '{kv_pair.value.content}' found within "
-                    f"'{kv_pair.value.bounding_regions}' bounding regions\n"
+                    f"'{format_bounding_region(kv_pair.value.bounding_regions)}' bounding regions\n"
                 )
 
     for page in result.pages:
@@ -92,17 +107,18 @@ def analyze_general_documents():
                 words = get_words(page.words, line)
                 print(
                     f"...Line #{line_idx} has {len(words)} words and text '{line.content}' within "
-                    f"bounding polygon '{line.polygon}'"
+                    f"bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of "
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of "
                     f"{selection_mark.confidence}"
                 )
 
@@ -111,13 +127,15 @@ def analyze_general_documents():
             print(f"Table # {table_idx} has {table.row_count} rows and {table.column_count} columns")
             if table.bounding_regions:
                 for region in table.bounding_regions:
-                    print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                    print(
+                        f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                    )
             for cell in table.cells:
                 print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
                 if cell.bounding_regions:
                     for region in cell.bounding_regions:
                         print(
-                            f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'\n"
+                            f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'\n"
                         )
     print("----------------------------------------")
     # [END analyze_general_documents]

diff --git a/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_analyze_layout.py b/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_analyze_layout.py
@@ -43,12 +43,24 @@ def _in_span(word, spans):
     return False
 
 
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)
+
+
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 def analyze_layout():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
             os.path.abspath(__file__),
             "..",
-            "./sample_forms/forms/form_selection_mark.png",
+            "./sample_forms/forms/tabular_and_general_data.docx",
         )
     )
 
@@ -81,30 +93,47 @@ def analyze_layout():
                 words = get_words(page, line)
                 print(
                     f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
-                    f"within bounding polygon '{line.polygon}'"
+                    f"within bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
                 )
 
+    if result.paragraphs:
+        print(f"----Detected #{len(result.paragraphs)} paragraphs in the document----")
+        # Sort all paragraphs by span's offset to read in the right order.
+        result.paragraphs.sort(key=lambda p: (p.spans.sort(key=lambda s: s.offset), p.spans[0].offset))
+        print("-----Print sorted paragraphs-----")
+        for paragraph in result.paragraphs:
+            print(
+                f"Found paragraph with role: '{paragraph.role}' within {format_bounding_region(paragraph.bounding_regions)} bounding region"
+            )
+            print(f"...with content: '{paragraph.content}'")
+            print(f"...with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}")
+
     if result.tables:
         for table_idx, table in enumerate(result.tables):
             print(f"Table # {table_idx} has {table.row_count} rows and " f"{table.column_count} columns")
             if table.bounding_regions:
                 for region in table.bounding_regions:
-                    print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
+                    print(
+                        f"Table # {table_idx} location on page: {region.page_number} is {format_polygon(region.polygon)}"
+                    )
             for cell in table.cells:
                 print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
                 if cell.bounding_regions:
                     for region in cell.bounding_regions:
-                        print(f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'")
+                        print(
+                            f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'"
+                        )
 
     print("----------------------------------------")
     # [END extract_layout]

diff --git a/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_analyze_read.py b/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_analyze_read.py
@@ -39,6 +39,18 @@ def _in_span(word, spans):
     return False
 
 
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)
+
+
+def format_polygon(polygon):
+    if not polygon:
+        return "N/A"
+    return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
+
+
 def analyze_read():
     path_to_sample_documents = os.path.abspath(
         os.path.join(
@@ -88,31 +100,33 @@ def analyze_read():
             for line_idx, line in enumerate(page.lines):
                 words = get_words(page, line)
                 print(
-                    f"...Line # {line_idx} has {len(words)} words and text '{line.content}' within bounding polygon '{line.polygon}'"
+                    f"...Line # {line_idx} has {len(words)} words and text '{line.content}' within "
+                    f"bounding polygon '{format_polygon(line.polygon)}'"
                 )
 
-                for word in words:
-                    print(f"......Word '{word.content}' has a confidence of {word.confidence}")
+        if page.words:
+            for word in page.words:
+                print(f"......Word '{word.content}' has a confidence of {word.confidence}")
 
         if page.selection_marks:
             for selection_mark in page.selection_marks:
                 print(
                     f"...Selection mark is '{selection_mark.state}' within bounding polygon "
-                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
+                    f"'{format_polygon(selection_mark.polygon)}' and has a confidence of {selection_mark.confidence}"
                 )
 
     if result.paragraphs:
         print(f"----Detected #{len(result.paragraphs)} paragraphs in the document----")
-        for paragraph in result.paragraphs:
-            print(f"Found paragraph with role: '{paragraph.role}' within {paragraph.bounding_regions} bounding region")
-            print(f"...with content: '{paragraph.content}'")
-
+        # Sort all paragraphs by span's offset to read in the right order.
         result.paragraphs.sort(key=lambda p: (p.spans.sort(key=lambda s: s.offset), p.spans[0].offset))
         print("-----Print sorted paragraphs-----")
-        for idx, paragraph in enumerate(result.paragraphs):
+        for paragraph in result.paragraphs:
             print(
-                f"...paragraph:{idx} with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}"
+                f"Found paragraph with role: '{paragraph.role}' within "
+                f"{format_bounding_region(paragraph.bounding_regions)} bounding regions"
             )
+            print(f"...with content: '{paragraph.content}'")
+            print(f"...with offset: {paragraph.spans[0].offset} and length: {paragraph.spans[0].length}")
 
     print("----------------------------------------")
 

diff --git a/...ce/azure-ai-documentintelligence/samples/sample_forms/forms/tabular_and_general_data.docx b/...ce/azure-ai-documentintelligence/samples/sample_forms/forms/tabular_and_general_data.docx