From 086fbd57ea0cff0aaf9e8c338eef84f4efcde0c7 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 20 Sep 2024 15:55:30 +0200 Subject: [PATCH 1/8] updated the render_as_doctags with the new arguments from docling-core Signed-off-by: Peter Staar --- docling/datamodel/document.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index b8177730..5904df14 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -368,20 +368,28 @@ def render_as_doctags( "table", "figure", ], - page_tagging: bool = True, - location_tagging: bool = True, - location_dimensions: Tuple[int, int] = (100, 100), - add_new_line: bool = True, + xsize: int = 100, + ysize: int = 100, + add_location: bool = True, + add_content: bool = True, + add_page_index: bool = True, + # table specific flags + add_table_cell_location: bool = False, + add_table_cell_label: bool = True, + add_table_cell_text: bool = True, ) -> str: return self.output.export_to_document_tokens( delim=delim, main_text_start=main_text_start, main_text_stop=main_text_stop, main_text_labels=main_text_labels, - page_tagging=page_tagging, - location_tagging=location_tagging, - location_dimensions=location_dimensions, - add_new_line=add_new_line, + add_location=add_location, + add_content=add_content, + add_page_index=add_page_index, + # table specific flags + add_table_cell_location=add_table_cell_location, + add_table_cell_label=add_table_cell_labe + add_table_cell_text=add_table_cell_text ) def render_element_images( From a44c4c08724a7e8667ae5e0e101746f7b3624d93 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sat, 21 Sep 2024 06:06:26 +0200 Subject: [PATCH 2/8] ensuring that docling-core is >1.5.0 to accomodate with the latest export-to-doctags parameters Signed-off-by: Peter Staar --- poetry.lock | 16 ++++------------ pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index 075a5220..2f9bb4cb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -863,26 +863,18 @@ files = [ {file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:c69e055b98d0a22267a1d0b6139801aecc5b7386289b89f53f976ab723352728"}, {file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3eaa245e5ac4ab3e9d0c95a93e23f58d61d70f11431b76b6705fae358eb31c62"}, {file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:63d195f6c5b30f4f908436589cffd4a5b9e18553c44c57fb635068a2afbd7fab"}, - {file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c9296a2e417a30bf030de0c7c2e2cce4773c58bead039d5e6fccbf7deb2269"}, - {file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:166b9958d3a8a98d0671a1e3fdf8083ded9ccf12c2ab80fb9709908a2cf81784"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:13bea2b4e8c04647ec743c3feb1ee66c784db542ab9dbed8dad7eb66fca74b70"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:c5b8b8e2207615ff99e535f00548c7b0b8e4ca4593e59edd83fcad98fc318284"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ba74868243caf5ac850fff7c45c8a372c1cac0193431e22eb41888d45ac79719"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7815b06aa1c3953488496f191ce0265d0ee7bed5a6b96454a5f9d6f1add28f69"}, - {file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1a7dd2a1e63cee47f6090ebfebc15f68d24f61d5f4f45a21f22120b2267798d"}, - {file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52bd2934a27fdc9db5f2d0713dbeec0c94e5c5843d29996e85d641a11498ad0"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:fd4d0d4ff853e566b05769c704a4ea3c050c0cfc5721e4e2035e550fb2a8fe91"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:802a59a8a3bea1801bce848d58d19fcdbbcea27d9e2c23f163419d13cdec2345"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1ead7958bc044000a8d43cce53c9b82be0d341b0ca5cf7b39a0c09f9c4fd8ceb"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:312cf2b0b6560c8dfe5331a5a80a0ed5cb409d29ee6cc999a81696774d50f5e7"}, - {file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc3d6f6ca2cffbe5e112818c8aba9a783af8ab7cffff04624bfb5bf8d185b707"}, - {file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc537d5e9d108233b7e7249c6739292dc9c36a0f39c11e7f430700df35ff884"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:4db0a700c08ff2d6285461dc5f4a68ccd36876a59b62131f847dc4be76a85989"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:f1041c44d1a4d1a43a324781795b03edfdfd8076c49a610c4dd384c86f2a6236"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:efb0e9678fe07640bd9b6dc07651eaf1f8e5d5602e379b4cf78dbcddc62b50e9"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:f8d46922d74339ec7fd7a6933220ebc36b2ff39738ad9bb74ea55a198dd31b2f"}, - {file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2872de101ce6d262f57afd3f4d68452064c214c5ab001b7ac698a948e0725314"}, - {file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187da7dabc11317badbf6983ee508c367299eb39ed78938623206be6b21e41bd"}, ] [package.dependencies] @@ -957,13 +949,13 @@ files = [ [[package]] name = "docling-core" -version = "1.4.0" +version = "1.5.0" description = "A python library to define and validate data types in Docling." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_core-1.4.0-py3-none-any.whl", hash = "sha256:11cd6228d5f321fd11427cf61f40148afd544170e82236228794300f14f8a15a"}, - {file = "docling_core-1.4.0.tar.gz", hash = "sha256:6ea151974172a87a9bca0d63787dc16bdb4170ecb73f18e61e3c2e95eb3fe3d8"}, + {file = "docling_core-1.5.0-py3-none-any.whl", hash = "sha256:1a8bb4940ecbf98c6381298f3ad121d95aa8895883150a5dd113a348a0987d09"}, + {file = "docling_core-1.5.0.tar.gz", hash = "sha256:bc8ddbae16e2b740225f37758125eb95b9fcd4202542c4547a9683a7ad423e10"}, ] [package.dependencies] @@ -7257,4 +7249,4 @@ examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitte [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "7dc789b3c981898fdabec03f85ebb92273f2bb55b2bf1e18dad1d4c361c6b97b" +content-hash = "7ee1e9e99c23e075fb1f8722e4fc9e6c0b02a4282f4e67ebbcd75598720536b7" diff --git a/pyproject.toml b/pyproject.toml index cd20fb64..81b37044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ packages = [{include = "docling"}] [tool.poetry.dependencies] python = "^3.10" pydantic = "^2.0.0" -docling-core = "^1.4.0" +docling-core = "^1.5.0" docling-ibm-models = "^1.2.0" deepsearch-glm = "^0.21.1" filetype = "^1.2.0" From cb3d23b294c7f48cfab4859dc3bb0d40b47bd5b6 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sat, 21 Sep 2024 06:26:23 +0200 Subject: [PATCH 3/8] added the doctags tests Signed-off-by: Peter Staar --- docling/datamodel/document.py | 2 +- tests/data/2203.01017v2.doctags.txt | 351 +++++ tests/data/2203.01017v2.json | 2 +- tests/data/2206.01062.doctags.txt | 237 +++ tests/data/2206.01062.json | 2 +- tests/data/2305.03393v1-pg9.doctags.txt | 20 + tests/data/2305.03393v1-pg9.json | 2 +- tests/data/2305.03393v1.doctags.txt | 149 ++ tests/data/2305.03393v1.json | 2 +- tests/data/redp5110.doctags.txt | 1843 +++++++++++++++++++++++ tests/data/redp5110.json | 2 +- tests/data/redp5695.doctags.txt | 443 ++++++ tests/data/redp5695.json | 2 +- tests/verify_utils.py | 16 + 14 files changed, 3066 insertions(+), 7 deletions(-) create mode 100644 tests/data/2203.01017v2.doctags.txt create mode 100644 tests/data/2206.01062.doctags.txt create mode 100644 tests/data/2305.03393v1-pg9.doctags.txt create mode 100644 tests/data/2305.03393v1.doctags.txt create mode 100644 tests/data/redp5110.doctags.txt create mode 100644 tests/data/redp5695.doctags.txt diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 5904df14..fe0507b8 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -388,7 +388,7 @@ def render_as_doctags( add_page_index=add_page_index, # table specific flags add_table_cell_location=add_table_cell_location, - add_table_cell_label=add_table_cell_labe + add_table_cell_label=add_table_cell_label, add_table_cell_text=add_table_cell_text ) diff --git a/tests/data/2203.01017v2.doctags.txt b/tests/data/2203.01017v2.doctags.txt new file mode 100644 index 00000000..db8f9674 --- /dev/null +++ b/tests/data/2203.01017v2.doctags.txt @@ -0,0 +1,351 @@ + +TableFormer: Table Structure Understanding with Transformers. +Ahmed Nassar, Nikolaos Livathinos, Maksym Lysak, Peter Staar IBM Research +{ ahn,nli,mly,taa } @zurich.ibm.com +Abstract +a. Picture of a table: +1. Introduction +The occurrence of tables in documents is ubiquitous. They often summarise quantitative or factual data, which is cumbersome to describe in verbose text but nevertheless extremely valuable. Unfortunately, this compact representation is often not easy to parse by machines. There are many implicit conventions used to obtain a compact table representation. For example, tables often have complex columnand row-headers in order to reduce duplicated cell content. Lines of different shapes and sizes are leveraged to separate content or indicate a tree structure. Additionally, tables can also have empty/missing table-entries or multi-row textual table-entries. Fig. 1 shows a table which presents all these issues. +Tables organize valuable content in a concise and compact representation. This content is extremely valuable for systems such as search engines, Knowledge Graph's, etc, since they enhance their predictive capabilities. Unfortunately, tables come in a large variety of shapes and sizes. Furthermore, they can have complex column/row-header configurations, multiline rows, different variety of separation lines, missing entries, etc. As such, the correct identification of the table-structure from an image is a nontrivial task. In this paper, we present a new table-structure identification model. The latter improves the latest end-toend deep learning model (i.e. encoder-dual-decoder from PubTabNet) in two significant ways. First, we introduce a new object detection decoder for table-cells. In this way, we can obtain the content of the table-cells from programmatic PDF's directly from the PDF source and avoid the training of the custom OCR decoders. This architectural change leads to more accurate table-content extraction and allows us to tackle non-english tables. Second, we replace the LSTM decoders with transformer based decoders. This upgrade improves significantly the previous state-of-the-art tree-editing-distance-score (TEDS) from 91% to 98.5% on simple tables and from 88.7% to 95% on complex tables. + + + +31 +2 +
Tables organize valuable content in a concise and compact representation. This content is extremely valuable for systems such as search engines, Knowledge Graph's, etc, since they enhance their predictive capabilities. Unfortunately, tables come in a large variety of shapes and sizes. Furthermore, they can have complex column/row-header configurations, multiline rows, different variety of separation lines, missing entries, etc. As such, the correct identification of the table-structure from an image is a nontrivial task. In this paper, we present a new table-structure identification model. The latter improves the latest end-toend deep learning model (i.e. encoder-dual-decoder from PubTabNet) in two significant ways. First, we introduce a new object detection decoder for table-cells. In this way, we can obtain the content of the table-cells from programmatic PDF's directly from the PDF source and avoid the training of the custom OCR decoders. This architectural change leads to more accurate table-content extraction and allows us to tackle non-english tables. Second, we replace the LSTM decoders with transformer based decoders. This upgrade improves significantly the previous state-of-the-art tree-editing-distance-score (TEDS) from 91% to 98.5% on simple tables and from 88.7% to 95% on complex tables.
+b. Red-annotation of bounding boxes, Blue-predictions by TableFormer +
+ +
+c. Structure predicted by TableFormer: +Figure 1: Picture of a table with subtle, complex features such as (1) multi-column headers, (2) cell with multi-row text and (3) cells with no content. Image from PubTabNet evaluation set, filename: 'PMC2944238 004 02'. + + + +0112 12 1 +345 367 +891011122 +131415162 +171819202 +
Figure 1: Picture of a table with subtle, complex features such as (1) multi-column headers, (2) cell with multi-row text and (3) cells with no content. Image from PubTabNet evaluation set, filename: 'PMC2944238 004 02'.
+Recently, significant progress has been made with vision based approaches to extract tables in documents. For the sake of completeness, the issue of table extraction from documents is typically decomposed into two separate challenges, i.e. (1) finding the location of the table(s) on a document-page and (2) finding the structure of a given table in the document. +The first problem is called table-location and has been previously addressed [30, 38, 19, 21, 23, 26, 8] with stateof-the-art object-detection networks (e.g. YOLO and later on Mask-RCNN [9]). For all practical purposes, it can be +considered as a solved problem, given enough ground-truth data to train on. +The second problem is called table-structure decomposition. The latter is a long standing problem in the community of document understanding [6, 4, 14]. Contrary to the table-location problem, there are no commonly used approaches that can easily be re-purposed to solve this problem. Lately, a set of new model-architectures has been proposed by the community to address table-structure decomposition [37, 36, 18, 20]. All these models have some weaknesses (see Sec. 2). The common denominator here is the reliance on textual features and/or the inability to provide the bounding box of each table-cell in the original image. +In this paper, we want to address these weaknesses and present a robust table-structure decomposition algorithm. The design criteria for our model are the following. First, we want our algorithm to be language agnostic. In this way, we can obtain the structure of any table, irregardless of the language. Second, we want our algorithm to leverage as much data as possible from the original PDF document. For programmatic PDF documents, the text-cells can often be extracted much faster and with higher accuracy compared to OCR methods. Last but not least, we want to have a direct link between the table-cell and its bounding box in the image. +To meet the design criteria listed above, we developed a new model called TableFormer and a synthetically generated table structure dataset called SynthTabNet $^{1}$. In particular, our contributions in this work can be summarised as follows: +· We propose TableFormer , a transformer based model that predicts tables structure and bounding boxes for the table content simultaneously in an end-to-end approach. +· Across all benchmark datasets TableFormer significantly outperforms existing state-of-the-art metrics, while being much more efficient in training and inference to existing works. +· We present SynthTabNet a synthetically generated dataset, with various appearance styles and complexity. +· An augmented dataset based on PubTabNet [37], FinTabNet [36], and TableBank [17] with generated ground-truth for reproducibility. +The paper is structured as follows. In Sec. 2, we give a brief overview of the current state-of-the-art. In Sec. 3, we describe the datasets on which we train. In Sec. 4, we introduce the TableFormer model-architecture and describe +its results & performance in Sec. 5. As a conclusion, we describe how this new model-architecture can be re-purposed for other tasks in the computer-vision community. +2. Previous work and State of the Art +Identifying the structure of a table has been an outstanding problem in the document-parsing community, that motivates many organised public challenges [6, 4, 14]. The difficulty of the problem can be attributed to a number of factors. First, there is a large variety in the shapes and sizes of tables. Such large variety requires a flexible method. This is especially true for complex column- and row headers, which can be extremely intricate and demanding. A second factor of complexity is the lack of data with regard to table-structure. Until the publication of PubTabNet [37], there were no large datasets (i.e. > 100 K tables) that provided structure information. This happens primarily due to the fact that tables are notoriously time-consuming to annotate by hand. However, this has definitely changed in recent years with the deliverance of PubTabNet [37], FinTabNet [36], TableBank [17] etc. +Before the rising popularity of deep neural networks, the community relied heavily on heuristic and/or statistical methods to do table structure identification [3, 7, 11, 5, 13, 28]. Although such methods work well on constrained tables [12], a more data-driven approach can be applied due to the advent of convolutional neural networks (CNNs) and the availability of large datasets. To the best-of-our knowledge, there are currently two different types of network architecture that are being pursued for state-of-the-art tablestructure identification. +Image-to-Text networks : In this type of network, one predicts a sequence of tokens starting from an encoded image. Such sequences of tokens can be HTML table tags [37, 17] or LaTeX symbols[10]. The choice of symbols is ultimately not very important, since one can be transformed into the other. There are however subtle variations in the Image-to-Text networks. The easiest network architectures are "image-encoder → text-decoder" (IETD), similar to network architectures that try to provide captions to images [32]. In these IETD networks, one expects as output the LaTeX/HTML string of the entire table, i.e. the symbols necessary for creating the table with the content of the table. Another approach is the "image-encoder → dual decoder" (IEDD) networks. In these type of networks, one has two consecutive decoders with different purposes. The first decoder is the tag-decoder , i.e. it only produces the HTML/LaTeX tags which construct an empty table. The second content-decoder uses the encoding of the image in combination with the output encoding of each cell-tag (from the tag-decoder ) to generate the textual content of each table cell. The network architecture of IEDD is certainly more elaborate, but it has the advantage that one can pre-train the +tag-decoder which is constrained to the table-tags. +In practice, both network architectures (IETD and IEDD) require an implicit, custom trained object-characterrecognition (OCR) to obtain the content of the table-cells. In the case of IETD, this OCR engine is implicit in the decoder similar to [24]. For the IEDD, the OCR is solely embedded in the content-decoder. This reliance on a custom, implicit OCR decoder is of course problematic. OCR is a well known and extremely tough problem, that often needs custom training for each individual language. However, the limited availability for non-english content in the current datasets, makes it impractical to apply the IETD and IEDD methods on tables with other languages. Additionally, OCR can be completely omitted if the tables originate from programmatic PDF documents with known positions of each cell. The latter was the inspiration for the work of this paper. +Graph Neural networks : Graph Neural networks (GNN's) take a radically different approach to tablestructure extraction. Note that one table cell can constitute out of multiple text-cells. To obtain the table-structure, one creates an initial graph, where each of the text-cells becomes a node in the graph similar to [33, 34, 2]. Each node is then associated with en embedding vector coming from the encoded image, its coordinates and the encoded text. Furthermore, nodes that represent adjacent text-cells are linked. Graph Convolutional Networks (GCN's) based methods take the image as an input, but also the position of the text-cells and their content [18]. The purpose of a GCN is to transform the input graph into a new graph, which replaces the old links with new ones. The new links then represent the table-structure. With this approach, one can avoid the need to build custom OCR decoders. However, the quality of the reconstructed structure is not comparable to the current state-of-the-art [18]. +Hybrid Deep Learning-Rule-Based approach : A popular current model for table-structure identification is the use of a hybrid Deep Learning-Rule-Based approach similar to [27, 29]. In this approach, one first detects the position of the table-cells with object detection (e.g. YoloVx or MaskRCNN), then classifies the table into different types (from its images) and finally uses different rule-sets to obtain its table-structure. Currently, this approach achieves stateof-the-art results, but is not an end-to-end deep-learning method. As such, new rules need to be written if different types of tables are encountered. +3. Datasets +We rely on large-scale datasets such as PubTabNet [37], FinTabNet [36], and TableBank [17] datasets to train and evaluate our models. These datasets span over various appearance styles and content. We also introduce our own synthetically generated SynthTabNet dataset to fix an im- +Figure 2: Distribution of the tables across different table dimensions in PubTabNet + FinTabNet datasets +
+ +Figure 2: Distribution of the tables across different table dimensions in PubTabNet + FinTabNet datasets +
+balance in the previous datasets. +The PubTabNet dataset contains 509k tables delivered as annotated PNG images. The annotations consist of the table structure represented in HTML format, the tokenized text and its bounding boxes per table cell. Fig. 1 shows the appearance style of PubTabNet. Depending on its complexity, a table is characterized as "simple" when it does not contain row spans or column spans, otherwise it is "complex". The dataset is divided into Train and Val splits (roughly 98% and 2%). The Train split consists of 54% simple and 46% complex tables and the Val split of 51% and 49% respectively. The FinTabNet dataset contains 112k tables delivered as single-page PDF documents with mixed table structures and text content. Similarly to the PubTabNet, the annotations of FinTabNet include the table structure in HTML, the tokenized text and the bounding boxes on a table cell basis. The dataset is divided into Train, Test and Val splits (81%, 9.5%, 9.5%), and each one is almost equally divided into simple and complex tables (Train: 48% simple, 52% complex, Test: 48% simple, 52% complex, Test: 53% simple, 47% complex). Finally the TableBank dataset consists of 145k tables provided as JPEG images. The latter has annotations for the table structure, but only few with bounding boxes of the table cells. The entire dataset consists of simple tables and it is divided into 90% Train, 3% Test and 7% Val splits. +Due to the heterogeneity across the dataset formats, it was necessary to combine all available data into one homogenized dataset before we could train our models for practical purposes. Given the size of PubTabNet, we adopted its annotation format and we extracted and converted all tables as PNG images with a resolution of 72 dpi. Additionally, we have filtered out tables with extreme sizes due to small +amount of such tables, and kept only those ones ranging between 1*1 and 20*10 (rows/columns). +The availability of the bounding boxes for all table cells is essential to train our models. In order to distinguish between empty and non-empty bounding boxes, we have introduced a binary class in the annotation. Unfortunately, the original datasets either omit the bounding boxes for whole tables (e.g. TableBank) or they narrow their scope only to non-empty cells. Therefore, it was imperative to introduce a data pre-processing procedure that generates the missing bounding boxes out of the annotation information. This procedure first parses the provided table structure and calculates the dimensions of the most fine-grained grid that covers the table structure. Notice that each table cell may occupy multiple grid squares due to row or column spans. In case of PubTabNet we had to compute missing bounding boxes for 48% of the simple and 69% of the complex tables. Regarding FinTabNet, 68% of the simple and 98% of the complex tables require the generation of bounding boxes. +As it is illustrated in Fig. 2, the table distributions from all datasets are skewed towards simpler structures with fewer number of rows/columns. Additionally, there is very limited variance in the table styles, which in case of PubTabNet and FinTabNet means one styling format for the majority of the tables. Similar limitations appear also in the type of table content, which in some cases (e.g. FinTabNet) is restricted to a certain domain. Ultimately, the lack of diversity in the training dataset damages the ability of the models to generalize well on unseen data. +Motivated by those observations we aimed at generating a synthetic table dataset named SynthTabNet . This approach offers control over: 1) the size of the dataset, 2) the table structure, 3) the table style and 4) the type of content. The complexity of the table structure is described by the size of the table header and the table body, as well as the percentage of the table cells covered by row spans and column spans. A set of carefully designed styling templates provides the basis to build a wide range of table appearances. Lastly, the table content is generated out of a curated collection of text corpora. By controlling the size and scope of the synthetic datasets we are able to train and evaluate our models in a variety of different conditions. For example, we can first generate a highly diverse dataset to train our models and then evaluate their performance on other synthetic datasets which are focused on a specific domain. +In this regard, we have prepared four synthetic datasets, each one containing 150k examples. The corpora to generate the table text consists of the most frequent terms appearing in PubTabNet and FinTabNet together with randomly generated text. The first two synthetic datasets have been fine-tuned to mimic the appearance of the original datasets but encompass more complicated table structures. The third +Table 1: Both "Combined-Tabnet" and "CombinedTabnet" are variations of the following: (*) The CombinedTabnet dataset is the processed combination of PubTabNet and Fintabnet. (**) The combined dataset is the processed combination of PubTabNet, Fintabnet and TableBank. + + + +TagsBboxSizeFormat +PubTabNet33509kPNG +FinTabNet33112kPDF +TableBank37145kJPEG +Combined-Tabnet(*)33400kPNG +Combined(**)33500kPNG +SynthTabNet33600kPNG +
Table 1: Both "Combined-Tabnet" and "CombinedTabnet" are variations of the following: (*) The CombinedTabnet dataset is the processed combination of PubTabNet and Fintabnet. (**) The combined dataset is the processed combination of PubTabNet, Fintabnet and TableBank.
+one adopts a colorful appearance with high contrast and the last one contains tables with sparse content. Lastly, we have combined all synthetic datasets into one big unified synthetic dataset of 600k examples. +Tab. 1 summarizes the various attributes of the datasets. +4. The TableFormer model +Given the image of a table, TableFormer is able to predict: 1) a sequence of tokens that represent the structure of a table, and 2) a bounding box coupled to a subset of those tokens. The conversion of an image into a sequence of tokens is a well-known task [35, 16]. While attention is often used as an implicit method to associate each token of the sequence with a position in the original image, an explicit association between the individual table-cells and the image bounding boxes is also required. +4.1. Model architecture. +We now describe in detail the proposed method, which is composed of three main components, see Fig. 4. Our CNN Backbone Network encodes the input as a feature vector of predefined length. The input feature vector of the encoded image is passed to the Structure Decoder to produce a sequence of HTML tags that represent the structure of the table. With each prediction of an HTML standard data cell (' < td > ') the hidden state of that cell is passed to the Cell BBox Decoder. As for spanning cells, such as row or column span, the tag is broken down to ' < ', 'rowspan=' or 'colspan=', with the number of spanning cells (attribute), and ' > '. The hidden state attached to ' < ' is passed to the Cell BBox Decoder. A shared feed forward network (FFN) receives the hidden states from the Structure Decoder, to provide the final detection predictions of the bounding box coordinates and their classification. +CNN Backbone Network. A ResNet-18 CNN is the backbone that receives the table image and encodes it as a vector of predefined length. The network has been modified by removing the linear and pooling layer, as we are not per- +Figure 3: TableFormer takes in an image of the PDF and creates bounding box and HTML structure predictions that are synchronized. The bounding boxes grabs the content from the PDF and inserts it in the structure. +
+ +Figure 3: TableFormer takes in an image of the PDF and creates bounding box and HTML structure predictions that are synchronized. The bounding boxes grabs the content from the PDF and inserts it in the structure. +
+Figure 4: Given an input image of a table, the Encoder produces fixed-length features that represent the input image. The features are then passed to both the Structure Decoder and Cell BBox Decoder . During training, the Structure Decoder receives 'tokenized tags' of the HTML code that represent the table structure. Afterwards, a transformer encoder and decoder architecture is employed to produce features that are received by a linear layer, and the Cell BBox Decoder. The linear layer is applied to the features to predict the tags. Simultaneously, the Cell BBox Decoder selects features referring to the data cells (' < td > ', ' < ') and passes them through an attention network, an MLP, and a linear layer to predict the bounding boxes. +
+ +Figure 4: Given an input image of a table, the Encoder produces fixed-length features that represent the input image. The features are then passed to both the Structure Decoder and Cell BBox Decoder . During training, the Structure Decoder receives 'tokenized tags' of the HTML code that represent the table structure. Afterwards, a transformer encoder and decoder architecture is employed to produce features that are received by a linear layer, and the Cell BBox Decoder. The linear layer is applied to the features to predict the tags. Simultaneously, the Cell BBox Decoder selects features referring to the data cells (' < td > ', ' < ') and passes them through an attention network, an MLP, and a linear layer to predict the bounding boxes. +
+forming classification, and adding an adaptive pooling layer of size 28*28. ResNet by default downsamples the image resolution by 32 and then the encoded image is provided to both the Structure Decoder , and Cell BBox Decoder . +Structure Decoder. The transformer architecture of this component is based on the work proposed in [31]. After extensive experimentation, the Structure Decoder is modeled as a transformer encoder with two encoder layers and a transformer decoder made from a stack of 4 decoder layers that comprise mainly of multi-head attention and feed forward layers. This configuration uses fewer layers and heads in comparison to networks applied to other problems (e.g. "Scene Understanding", "Image Captioning"), something which we relate to the simplicity of table images. +The transformer encoder receives an encoded image from the CNN Backbone Network and refines it through a multi-head dot-product attention layer, followed by a Feed Forward Network. During training, the transformer decoder receives as input the output feature produced by the transformer encoder, and the tokenized input of the HTML ground-truth tags. Using a stack of multi-head attention layers, different aspects of the tag sequence could be inferred. This is achieved by each attention head on a layer operating in a different subspace, and then combining altogether their attention score. +Cell BBox Decoder. Our architecture allows to simultaneously predict HTML tags and bounding boxes for each table cell without the need of a separate object detector end to end. This approach is inspired by DETR [1] which employs a Transformer Encoder, and Decoder that looks for a specific number of object queries (potential object detections). As our model utilizes a transformer architecture, the hidden state of the < td > ' and ' < ' HTML structure tags become the object query. +The encoding generated by the CNN Backbone Network along with the features acquired for every data cell from the Transformer Decoder are then passed to the attention network. The attention network takes both inputs and learns to provide an attention weighted encoding. This weighted at- +tention encoding is then multiplied to the encoded image to produce a feature for each table cell. Notice that this is different than the typical object detection problem where imbalances between the number of detections and the amount of objects may exist. In our case, we know up front that the produced detections always match with the table cells in number and correspondence. +The output features for each table cell are then fed into the feed-forward network (FFN). The FFN consists of a Multi-Layer Perceptron (3 layers with ReLU activation function) that predicts the normalized coordinates for the bounding box of each table cell. Finally, the predicted bounding boxes are classified based on whether they are empty or not using a linear layer. +Loss Functions. We formulate a multi-task loss Eq. 2 to train our network. The Cross-Entropy loss (denoted as l$_{s}$ ) is used to train the Structure Decoder which predicts the structure tokens. As for the Cell BBox Decoder it is trained with a combination of losses denoted as l$_{box}$ . l$_{box}$ consists of the generally used l$_{1}$ loss for object detection and the IoU loss ( l$_{iou}$ ) to be scale invariant as explained in [25]. In comparison to DETR, we do not use the Hungarian algorithm [15] to match the predicted bounding boxes with the ground-truth boxes, as we have already achieved a one-toone match through two steps: 1) Our token input sequence is naturally ordered, therefore the hidden states of the table data cells are also in order when they are provided as input to the Cell BBox Decoder , and 2) Our bounding boxes generation mechanism (see Sec. 3) ensures a one-to-one mapping between the cell content and its bounding box for all post-processed datasets. +The loss used to train the TableFormer can be defined as following: +where λ ∈ [0, 1], and λ$_{iou}$, λ$_{l}$$_{1}$ ∈$_{R}$ are hyper-parameters. +5. Experimental Results +5.1. Implementation Details +TableFormer uses ResNet-18 as the CNN Backbone Network . The input images are resized to 448*448 pixels and the feature map has a dimension of 28*28. Additionally, we enforce the following input constraints: +Although input constraints are used also by other methods, such as EDD, ours are less restrictive due to the improved +runtime performance and lower memory footprint of TableFormer. This allows to utilize input samples with longer sequences and images with larger dimensions. +The Transformer Encoder consists of two "Transformer Encoder Layers", with an input feature size of 512, feed forward network of 1024, and 4 attention heads. As for the Transformer Decoder it is composed of four "Transformer Decoder Layers" with similar input and output dimensions as the "Transformer Encoder Layers". Even though our model uses fewer layers and heads than the default implementation parameters, our extensive experimentation has proved this setup to be more suitable for table images. We attribute this finding to the inherent design of table images, which contain mostly lines and text, unlike the more elaborate content present in other scopes (e.g. the COCO dataset). Moreover, we have added ResNet blocks to the inputs of the Structure Decoder and Cell BBox Decoder. This prevents a decoder having a stronger influence over the learned weights which would damage the other prediction task (structure vs bounding boxes), but learn task specific weights instead. Lastly our dropout layers are set to 0.5. +For training, TableFormer is trained with 3 Adam optimizers, each one for the CNN Backbone Network , Structure Decoder , and Cell BBox Decoder . Taking the PubTabNet as an example for our parameter set up, the initializing learning rate is 0.001 for 12 epochs with a batch size of 24, and λ set to 0.5. Afterwards, we reduce the learning rate to 0.0001, the batch size to 18 and train for 12 more epochs or convergence. +TableFormer is implemented with PyTorch and Torchvision libraries [22]. To speed up the inference, the image undergoes a single forward pass through the CNN Backbone Network and transformer encoder. This eliminates the overhead of generating the same features for each decoding step. Similarly, we employ a 'caching' technique to preform faster autoregressive decoding. This is achieved by storing the features of decoded tokens so we can reuse them for each time step. Therefore, we only compute the attention for each new tag. +5.2. Generalization +TableFormer is evaluated on three major publicly available datasets of different nature to prove the generalization and effectiveness of our model. The datasets used for evaluation are the PubTabNet, FinTabNet and TableBank which stem from the scientific, financial and general domains respectively. +We also share our baseline results on the challenging SynthTabNet dataset. Throughout our experiments, the same parameters stated in Sec. 5.1 are utilized. +5.3. Datasets and Metrics +The Tree-Edit-Distance-Based Similarity (TEDS) metric was introduced in [37]. It represents the prediction, and ground-truth as a tree structure of HTML tags. This similarity is calculated as: +where T$_{a}$ and T$_{b}$ represent tables in tree structure HTML format. EditDist denotes the tree-edit distance, and | T | represents the number of nodes in T . +5.4. Quantitative Analysis +Structure. As shown in Tab. 2, TableFormer outperforms all SOTA methods across different datasets by a large margin for predicting the table structure from an image. All the more, our model outperforms pre-trained methods. During the evaluation we do not apply any table filtering. We also provide our baseline results on the SynthTabNet dataset. It has been observed that large tables (e.g. tables that occupy half of the page or more) yield poor predictions. We attribute this issue to the image resizing during the preprocessing step, that produces downsampled images with indistinguishable features. This problem can be addressed by treating such big tables with a separate model which accepts a large input image size. +Table 2: Structure results on PubTabNet (PTN), FinTabNet (FTN), TableBank (TB) and SynthTabNet (STN). + + + +ModelDatasetSimpleTEDS ComplexAll +EDDPTN91.188.789.9 +GTEPTN--93.01 +TableFormerPTN98.595.096.75 +EDDFTN88.492.0890.6 +GTEFTN--87.14 +GTE (FT)FTN--91.02 +TableFormerFTN97.596.096.8 +EDDTB86.0-86.0 +TableFormerTB89.6-89.6 +TableFormerSTN96.995.796.7 +
Table 2: Structure results on PubTabNet (PTN), FinTabNet (FTN), TableBank (TB) and SynthTabNet (STN).
+FT: Model was trained on PubTabNet then finetuned. +Cell Detection. Like any object detector, our Cell BBox Detector provides bounding boxes that can be improved with post-processing during inference. We make use of the grid-like structure of tables to refine the predictions. A detailed explanation on the post-processing is available in the supplementary material. As shown in Tab. 3, we evaluate +our Cell BBox Decoder accuracy for cells with a class label of 'content' only using the PASCAL VOC mAP metric for pre-processing and post-processing. Note that we do not have post-processing results for SynthTabNet as images are only provided. To compare the performance of our proposed approach, we've integrated TableFormer's Cell BBox Decoder into EDD architecture. As mentioned previously, the Structure Decoder provides the Cell BBox Decoder with the features needed to predict the bounding box predictions. Therefore, the accuracy of the Structure Decoder directly influences the accuracy of the Cell BBox Decoder . If the Structure Decoder predicts an extra column, this will result in an extra column of predicted bounding boxes. +Table 3: Cell Bounding Box detection results on PubTabNet, and FinTabNet. PP: Post-processing. + + + +ModelDatasetmAPmAP (PP) +EDD+BBoxPubTabNet79.282.7 +TableFormerPubTabNet82.186.8 +TableFormerSynthTabNet87.7- +
Table 3: Cell Bounding Box detection results on PubTabNet, and FinTabNet. PP: Post-processing.
+Cell Content. In this section, we evaluate the entire pipeline of recovering a table with content. Here we put our approach to test by capitalizing on extracting content from the PDF cells rather than decoding from images. Tab. 4 shows the TEDs score of HTML code representing the structure of the table along with the content inserted in the data cell and compared with the ground-truth. Our method achieved a 5.3% increase over the state-of-the-art, and commercial solutions. We believe our scores would be higher if the HTML ground-truth matched the extracted PDF cell content. Unfortunately, there are small discrepancies such as spacings around words or special characters with various unicode representations. +Table 4: Results of structure with content retrieved using cell detection on PubTabNet. In all cases the input is PDF documents with cropped tables. + + + +ModelSimpleTEDS ComplexAll +Tabula78.057.867.9 +Traprange60.849.955.4 +Camelot80.066.073.0 +Acrobat Pro68.961.865.3 +EDD91.285.488.3 +TableFormer95.490.193.6 +
Table 4: Results of structure with content retrieved using cell detection on PubTabNet. In all cases the input is PDF documents with cropped tables.
+a. Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells +Japanese language (previously unseen by TableFormer): +Example table from FinTabNet: +
+ +
+
+ +
+b. Structure predicted by TableFormer, with superimposed matched PDF cell text: + + +論文ファイル論文ファイル参考文献参考文献 +出典ファイル 数英語日本語英語日本語 +Association for Computational Linguistics(ACL2003)656501500 +Computational Linguistics(COLING2002)14014001500 +電気情報通信学会 2003 年総合大会1508142223147 +情報処理学会第 65 回全国大会 (2003)1771176150236 +第 17 回人工知能学会全国大会 (2003)2085203152244 +自然言語処理研究会第 146 〜 155 回98296150232 +WWW から収集した論文107733414796 +9452946511122955 +
+Text is aligned to match original for ease of viewing + + + +Shares (in millions)Shares (in millions)Weighted Average Grant Date Fair ValueWeighted Average Grant Date Fair Value +RS U sPSUsRSUsPSUs +Nonvested on Janua ry 11. 10.390.10 $$ 91.19 +Granted0. 50.1117.44122.41 +Vested(0. 5 )(0.1)87.0881.14 +Canceled or forfeited(0. 1 )-102.0192.18 +Nonvested on December 311.00.3104.85 $$ 104.51 +
Text is aligned to match original for ease of viewing
+Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset. +
+ +Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset. +
+
+ +
+Figure 6: An example of TableFormer predictions (bounding boxes and structure) from generated SynthTabNet table. +
+ +Figure 6: An example of TableFormer predictions (bounding boxes and structure) from generated SynthTabNet table. +
+5.5. Qualitative Analysis +We showcase several visualizations for the different components of our network on various "complex" tables within datasets presented in this work in Fig. 5 and Fig. 6 As it is shown, our model is able to predict bounding boxes for all table cells, even for the empty ones. Additionally, our post-processing techniques can extract the cell content by matching the predicted bounding boxes to the PDF cells based on their overlap and spatial proximity. The left part of Fig. 5 demonstrates also the adaptability of our method to any language, as it can successfully extract Japanese text, although the training set contains only English content. We provide more visualizations including the intermediate steps in the supplementary material. Overall these illustrations justify the versatility of our method across a diverse range of table appearances and content type. +6. Future Work & Conclusion +In this paper, we presented TableFormer an end-to-end transformer based approach to predict table structures and bounding boxes of cells from an image. This approach enables us to recreate the table structure, and extract the cell content from PDF or OCR by using bounding boxes. Additionally, it provides the versatility required in real-world scenarios when dealing with various types of PDF documents, and languages. Furthermore, our method outperforms all state-of-the-arts with a wide margin. Finally, we introduce "SynthTabNet" a challenging synthetically generated dataset that reinforces missing characteristics from other datasets. +References +[1] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to- +end object detection with transformers. In Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm, editors, Computer Vision - ECCV 2020 , pages 213-229, Cham, 2020. Springer International Publishing. 5 +[2] Zewen Chi, Heyan Huang, Heng-Da Xu, Houjin Yu, Wanxuan Yin, and Xian-Ling Mao. Complicated table structure recognition. arXiv preprint arXiv:1908.04729 , 2019. 3 +[3] Bertrand Couasnon and Aurelie Lemaitre. Recognition of Tables and Forms , pages 647-677. Springer London, London, 2014. 2 +[4] Herv'e D'ejean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), Apr. 2019. http://sac.founderit.com/. 2 +[5] Basilios Gatos, Dimitrios Danatsas, Ioannis Pratikakis, and Stavros J Perantonis. Automatic table detection in document images. In International Conference on Pattern Recognition and Image Analysis , pages 609-618. Springer, 2005. 2 +[6] Max Gobel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013. 2 +[7] EA Green and M Krishnamoorthy. Recognition of tables using table grammars. procs. In Symposium on Document Analysis and Recognition (SDAIR'95) , pages 261-277. 2 +[8] Khurram Azeem Hashmi, Alain Pagani, Marcus Liwicki, Didier Stricker, and Muhammad Zeshan Afzal. Castabdetectors: Cascade network for table detection in document images with recursive feature pyramid and switchable atrous convolution. Journal of Imaging , 7(10), 2021. 1 +[9] Kaiming He, Georgia Gkioxari, Piotr Dollar, and Ross Girshick. Mask r-cnn. In Proceedings of the IEEE International Conference on Computer Vision (ICCV) , Oct 2017. 1 +[10] Yelin He, X. Qi, Jiaquan Ye, Peng Gao, Yihao Chen, Bingcong Li, Xin Tang, and Rong Xiao. Pingan-vcgroup's solution for icdar 2021 competition on scientific table image recognition to latex. ArXiv , abs/2105.01846, 2021. 2 +[11] Jianying Hu, Ramanujan S Kashi, Daniel P Lopresti, and Gordon Wilfong. Medium-independent table detection. In Document Recognition and Retrieval VII , volume 3967, pages 291-302. International Society for Optics and Photonics, 1999. 2 +[12] Matthew Hurst. A constraint-based approach to table structure derivation. In Proceedings of the Seventh International Conference on Document Analysis and Recognition - Volume 2 , ICDAR '03, page 911, USA, 2003. IEEE Computer Society. 2 +[13] Thotreingam Kasar, Philippine Barlas, Sebastien Adam, Cl'ement Chatelain, and Thierry Paquet. Learning to detect tables in scanned document images using line information. In 2013 12th International Conference on Document Analysis and Recognition , pages 1185-1189. IEEE, 2013. 2 +[14] Pratik Kayal, Mrinal Anand, Harsh Desai, and Mayank Singh. Icdar 2021 competition on scientific table image recognition to latex, 2021. 2 +[15] Harold W Kuhn. The hungarian method for the assignment problem. Naval research logistics quarterly , 2(1-2):83-97, 1955. 6 +[16] Girish Kulkarni, Visruth Premraj, Vicente Ordonez, Sagnik Dhar, Siming Li, Yejin Choi, Alexander C. Berg, and Tamara L. Berg. Babytalk: Understanding and generating simple image descriptions. IEEE Transactions on Pattern Analysis and Machine Intelligence , 35(12):2891-2903, 2013. 4 +[17] Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou, and Zhoujun Li. Tablebank: A benchmark dataset for table detection and recognition, 2019. 2, 3 +[18] Yiren Li, Zheng Huang, Junchi Yan, Yi Zhou, Fan Ye, and Xianhui Liu. Gfte: Graph-based financial table extraction. In Alberto Del Bimbo, Rita Cucchiara, Stan Sclaroff, Giovanni Maria Farinella, Tao Mei, Marco Bertini, Hugo Jair Escalante, and Roberto Vezzani, editors, Pattern Recognition. ICPR International Workshops and Challenges , pages 644-658, Cham, 2021. Springer International Publishing. 2, 3 +[19] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter Staar. Robust pdf document conversion using recurrent neural networks. Proceedings of the AAAI Conference on Artificial Intelligence , 35(17):15137-15145, May 2021. 1 +[20] Rujiao Long, Wen Wang, Nan Xue, Feiyu Gao, Zhibo Yang, Yongpan Wang, and Gui-Song Xia. Parsing table structures in the wild. In Proceedings of the IEEE/CVF International Conference on Computer Vision , pages 944-952, 2021. 2 +[21] Shubham Singh Paliwal, D Vishwanath, Rohit Rahul, Monika Sharma, and Lovekesh Vig. Tablenet: Deep learning model for end-to-end table detection and tabular data extraction from scanned document images. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 128-133. IEEE, 2019. 1 +[22] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. Pytorch: An imperative style, high-performance deep learning library. In H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch'e-Buc, E. Fox, and R. Garnett, editors, Advances in Neural Information Processing Systems 32 , pages 8024-8035. Curran Associates, Inc., 2019. 6 +[23] Devashish Prasad, Ayan Gadpal, Kshitij Kapadni, Manish Visave, and Kavita Sultanpure. Cascadetabnet: An approach for end to end table detection and structure recognition from image-based documents. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops , pages 572-573, 2020. 1 +[24] Shah Rukh Qasim, Hassan Mahmood, and Faisal Shafait. Rethinking table recognition using graph neural networks. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 142-147. IEEE, 2019. 3 +[25] Hamid Rezatofighi, Nathan Tsoi, JunYoung Gwak, Amir Sadeghian, Ian Reid, and Silvio Savarese. Generalized intersection over union: A metric and a loss for bounding box regression. In Proceedings of the IEEE/CVF Conference on +Computer Vision and Pattern Recognition , pages 658-666, 2019. 6 +[26] Sebastian Schreiber, Stefan Agne, Ivo Wolf, Andreas Dengel, and Sheraz Ahmed. Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 11621167, 2017. 1 +[27] Sebastian Schreiber, Stefan Agne, Ivo Wolf, Andreas Dengel, and Sheraz Ahmed. Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In 2017 14th IAPR international conference on document analysis and recognition (ICDAR) , volume 1, pages 1162-1167. IEEE, 2017. 3 +[28] Faisal Shafait and Ray Smith. Table detection in heterogeneous documents. In Proceedings of the 9th IAPR International Workshop on Document Analysis Systems , pages 6572, 2010. 2 +[29] Shoaib Ahmed Siddiqui, Imran Ali Fateh, Syed Tahseen Raza Rizvi, Andreas Dengel, and Sheraz Ahmed. Deeptabstr: Deep learning based table structure recognition. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 1403-1409. IEEE, 2019. 3 +[30] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD , KDD '18, pages 774-782, New York, NY, USA, 2018. ACM. 1 +[31] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Ł ukasz Kaiser, and Illia Polosukhin. Attention is all you need. In I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett, editors, Advances in Neural Information Processing Systems 30 , pages 5998-6008. Curran Associates, Inc., 2017. 5 +[32] Oriol Vinyals, Alexander Toshev, Samy Bengio, and Dumitru Erhan. Show and tell: A neural image caption generator. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) , June 2015. 2 +[33] Wenyuan Xue, Qingyong Li, and Dacheng Tao. Res2tim: reconstruct syntactic structures from table images. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 749-755. IEEE, 2019. 3 +[34] Wenyuan Xue, Baosheng Yu, Wen Wang, Dacheng Tao, and Qingyong Li. Tgrnet: A table graph reconstruction network for table structure recognition. arXiv preprint arXiv:2106.10598 , 2021. 3 +[35] Quanzeng You, Hailin Jin, Zhaowen Wang, Chen Fang, and Jiebo Luo. Image captioning with semantic attention. In Proceedings of the IEEE conference on computer vision and pattern recognition , pages 4651-4659, 2016. 4 +[36] Xinyi Zheng, Doug Burdick, Lucian Popa, Peter Zhong, and Nancy Xin Ru Wang. Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. Winter Conference for Applications in Computer Vision (WACV) , 2021. 2, 3 +[37] Xu Zhong, Elaheh ShafieiBavani, and Antonio Jimeno Yepes. Image-based table recognition: Data, model, +and evaluation. In Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm, editors, Computer Vision ECCV 2020 , pages 564-580, Cham, 2020. Springer International Publishing. 2, 3, 7 +[38] Xu Zhong, Jianbin Tang, and Antonio Jimeno Yepes. Publaynet: Largest dataset ever for document layout analysis. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 1015-1022, 2019. 1 +TableFormer: Table Structure Understanding with Transformers +Supplementary Material +1. Details on the datasets +1.1. Data preparation +As a first step of our data preparation process, we have calculated statistics over the datasets across the following dimensions: (1) table size measured in the number of rows and columns, (2) complexity of the table, (3) strictness of the provided HTML structure and (4) completeness (i.e. no omitted bounding boxes). A table is considered to be simple if it does not contain row spans or column spans. Additionally, a table has a strict HTML structure if every row has the same number of columns after taking into account any row or column spans. Therefore a strict HTML structure looks always rectangular. However, HTML is a lenient encoding format, i.e. tables with rows of different sizes might still be regarded as correct due to implicit display rules. These implicit rules leave room for ambiguity, which we want to avoid. As such, we prefer to have "strict" tables, i.e. tables where every row has exactly the same length. +We have developed a technique that tries to derive a missing bounding box out of its neighbors. As a first step, we use the annotation data to generate the most fine-grained grid that covers the table structure. In case of strict HTML tables, all grid squares are associated with some table cell and in the presence of table spans a cell extends across multiple grid squares. When enough bounding boxes are known for a rectangular table, it is possible to compute the geometrical border lines between the grid rows and columns. Eventually this information is used to generate the missing bounding boxes. Additionally, the existence of unused grid squares indicates that the table rows have unequal number of columns and the overall structure is non-strict. The generation of missing bounding boxes for non-strict HTML tables is ambiguous and therefore quite challenging. Thus, we have decided to simply discard those tables. In case of PubTabNet we have computed missing bounding boxes for 48% of the simple and 69% of the complex tables. Regarding FinTabNet, 68% of the simple and 98% of the complex tables require the generation of bounding boxes. +Figure 7 illustrates the distribution of the tables across different dimensions per dataset. +1.2. Synthetic datasets +Aiming to train and evaluate our models in a broader spectrum of table data we have synthesized four types of datasets. Each one contains tables with different appear- +ances in regard to their size, structure, style and content. Every synthetic dataset contains 150k examples, summing up to 600k synthetic examples. All datasets are divided into Train, Test and Val splits (80%, 10%, 10%). +The process of generating a synthetic dataset can be decomposed into the following steps: +1. Prepare styling and content templates: The styling templates have been manually designed and organized into groups of scope specific appearances (e.g. financial data, marketing data, etc.) Additionally, we have prepared curated collections of content templates by extracting the most frequently used terms out of non-synthetic datasets (e.g. PubTabNet, FinTabNet, etc.). +2. Generate table structures: The structure of each synthetic dataset assumes a horizontal table header which potentially spans over multiple rows and a table body that may contain a combination of row spans and column spans. However, spans are not allowed to cross the header - body boundary. The table structure is described by the parameters: Total number of table rows and columns, number of header rows, type of spans (header only spans, row only spans, column only spans, both row and column spans), maximum span size and the ratio of the table area covered by spans. +3. Generate content: Based on the dataset theme , a set of suitable content templates is chosen first. Then, this content can be combined with purely random text to produce the synthetic content. +4. Apply styling templates: Depending on the domain of the synthetic dataset, a set of styling templates is first manually selected. Then, a style is randomly selected to format the appearance of the synthesized table. +5. Render the complete tables: The synthetic table is finally rendered by a web browser engine to generate the bounding boxes for each table cell. A batching technique is utilized to optimize the runtime overhead of the rendering process. +2. Prediction post-processing for PDF documents +Although TableFormer can predict the table structure and the bounding boxes for tables recognized inside PDF documents, this is not enough when a full reconstruction of the original table is required. This happens mainly due the following reasons: +Figure 7: Distribution of the tables across different dimensions per dataset. Simple vs complex tables per dataset and split, strict vs non strict html structures per dataset and table complexity, missing bboxes per dataset and table complexity. +
+ +Figure 7: Distribution of the tables across different dimensions per dataset. Simple vs complex tables per dataset and split, strict vs non strict html structures per dataset and table complexity, missing bboxes per dataset and table complexity. +
+· TableFormer output does not include the table cell content. +· There are occasional inaccuracies in the predictions of the bounding boxes. +However, it is possible to mitigate those limitations by combining the TableFormer predictions with the information already present inside a programmatic PDF document. More specifically, PDF documents can be seen as a sequence of PDF cells where each cell is described by its content and bounding box. If we are able to associate the PDF cells with the predicted table cells, we can directly link the PDF cell content to the table cell structure and use the PDF bounding boxes to correct misalignments in the predicted table cell bounding boxes. +Here is a step-by-step description of the prediction postprocessing: +1. Get the minimal grid dimensions - number of rows and columns for the predicted table structure. This represents the most granular grid for the underlying table structure. +2. Generate pair-wise matches between the bounding boxes of the PDF cells and the predicted cells. The Intersection Over Union (IOU) metric is used to evaluate the quality of the matches. +3. Use a carefully selected IOU threshold to designate the matches as "good" ones and "bad" ones. +3.a. If all IOU scores in a column are below the threshold, discard all predictions (structure and bounding boxes) for that column. +4. Find the best-fitting content alignment for the predicted cells with good IOU per each column. The alignment of the column can be identified by the following formula: +where c is one of { left, centroid, right } and x$_{c}$ is the xcoordinate for the corresponding point. +5. Use the alignment computed in step 4, to compute the median x -coordinate for all table columns and the me- +dian cell size for all table cells. The usage of median during the computations, helps to eliminate outliers caused by occasional column spans which are usually wider than the normal. +6. Snap all cells with bad IOU to their corresponding median x -coordinates and cell sizes. +7. Generate a new set of pair-wise matches between the corrected bounding boxes and PDF cells. This time use a modified version of the IOU metric, where the area of the intersection between the predicted and PDF cells is divided by the PDF cell area. In case there are multiple matches for the same PDF cell, the prediction with the higher score is preferred. This covers the cases where the PDF cells are smaller than the area of predicted or corrected prediction cells. +8. In some rare occasions, we have noticed that TableFormer can confuse a single column as two. When the postprocessing steps are applied, this results with two predicted columns pointing to the same PDF column. In such case we must de-duplicate the columns according to highest total column intersection score. +9. Pick up the remaining orphan cells. There could be cases, when after applying all the previous post-processing steps, some PDF cells could still remain without any match to predicted cells. However, it is still possible to deduce the correct matching for an orphan PDF cell by mapping its bounding box on the geometry of the grid. This mapping decides if the content of the orphan cell will be appended to an already matched table cell, or a new table cell should be created to match with the orphan. +9a. Compute the top and bottom boundary of the horizontal band for each grid row (min/max y coordinates per row). +9b. Intersect the orphan's bounding box with the row bands, and map the cell to the closest grid row. +9c. Compute the left and right boundary of the vertical band for each grid column (min/max x coordinates per column). +9d. Intersect the orphan's bounding box with the column bands, and map the cell to the closest grid column. +9e. If the table cell under the identified row and column is not empty, extend its content with the content of the or- +phan cell. +9f. Otherwise create a new structural cell and match it wit the orphan cell. +Aditional images with examples of TableFormer predictions and post-processing can be found below. +Figure 8: Example of a table with multi-line header. +Figure 9: Example of a table with big empty distance between cells. +
+ +Figure 9: Example of a table with big empty distance between cells. +
+Figure 10: Example of a complex table with empty cells. +
+ +Figure 10: Example of a complex table with empty cells. +
+
+ +
+Figure 11: Simple table with different style and empty cells. +
+ +Figure 11: Simple table with different style and empty cells. +
+Figure 12: Simple table predictions and post processing. +
+ +Figure 12: Simple table predictions and post processing. +
+Figure 13: Table predictions example on colorful table. +Figure 14: Example with multi-line text. +
+ +Figure 14: Example with multi-line text. +
+Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact. +
+ +Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact. +
+
+ +
+Figure 15: Example with triangular table. +
+ +Figure 15: Example with triangular table. +
+Figure 17: Example of long table. End-to-end example from initial PDF cells to prediction of bounding boxes, post processing and prediction of structure. +
+ +Figure 17: Example of long table. End-to-end example from initial PDF cells to prediction of bounding boxes, post processing and prediction of structure. +
+
\ No newline at end of file diff --git a/tests/data/2203.01017v2.json b/tests/data/2203.01017v2.json index 67b89260..02602086 100644 --- a/tests/data/2203.01017v2.json +++ b/tests/data/2203.01017v2.json @@ -1 +1 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2203.01017v2.pdf", "filename-prov": null, "document-hash": "4fa8dff93d74a84069210c84a38d14d62a39ec8f4e4c90bf955fdebdebcf6636", "#-pages": 16, "collection-name": null, "description": null, "page-hashes": [{"hash": "5deca8f7af439d2d968a480d07761ace8f704461e79d8b3d1dce2c394acdeab7", "model": "default", "page": 1}, {"hash": "81bd44713b62df481eaab1ac092cbc8b66359e53c7ecd637bb30d2680b1d2692", "model": "default", "page": 2}, {"hash": "95b5360d00f9fbcd6d5c5caa4529279e7f31219fd75e4495a349a1897700a2fe", "model": "default", "page": 3}, {"hash": "6d4e2424458b083b36c5559a7fe1a42175b082247c516ca8fef9f0d46e6f0bbc", "model": "default", "page": 4}, {"hash": "50115d582a0897fe1dd520a6876ec3f9321690ed0f6cfdc99a8d09019be073e8", "model": "default", "page": 5}, {"hash": "7d6c3a5e249a7f5de56f840b4ea97322f82ca158f6360d03a04a515a575334ab", "model": "default", "page": 6}, {"hash": "ccc222216b8699749c3cb8165aea097d4534eb5d136b2b41263632b1cfb39c67", "model": "default", "page": 7}, {"hash": "73792a09917cca8042a12d1e86bbd2c3c4ddc52d7a150b51940e5231e643bfb5", "model": "default", "page": 8}, {"hash": "8f623b1d6519eb087acf7a13bbe305f093837ba8d14d17cc1af3d091f98a0622", "model": "default", "page": 9}, {"hash": "bde30f21fc04de83c8bd77c8c61fae7f5f2586beb9f5bf346025d2f819269221", "model": "default", "page": 10}, {"hash": "c95eeaf7e1e6efd5a3d169b8914ffb8cb9e9fb82f8dbbae9e98873c3261df57a", "model": "default", "page": 11}, {"hash": "b0db1f70185308047bcdc86e8a515dab11ea727d6819da16fa24c3c829dc4b1c", "model": "default", "page": 12}, {"hash": "42e3d141f1ce66ee82a1447ce816b5a086f75362e6066155739437b058be8c7b", "model": "default", "page": 13}, {"hash": "41b546ffa2bea0771a5c77de1ca64c766ddc4305dd0316993b34b640b686ee06", "model": "default", "page": 14}, {"hash": "c5f2076bcd18075927d93d81dc83d2a5a0f3fdf1085d2f51e3ad10bdd6ad90bc", "model": "default", "page": 15}, {"hash": "0e6e359322b6c285571833316a3dfee50f7139f0ea088d026e0007cd2a679992", "model": "default", "page": 16}]}, "main-text": [{"text": "arXiv:2203.01017v2 [cs.CV] 11 Mar 2022", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [16.783903121948242, 231.99996948242188, 36.339778900146484, 584.1799926757812], "page": 1, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "TableFormer: Table Structure Understanding with Transformers.", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [95.52632904052734, 672.0686645507812, 498.9270935058594, 685.8598022460938], "page": 1, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "Ahmed Nassar, Nikolaos Livathinos, Maksym Lysak, Peter Staar IBM Research", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [141.79666137695312, 620.6796264648438, 453.0020751953125, 646.2996826171875], "page": 1, "span": [0, 73], "__ref_s3_data": null}]}, {"text": "{ ahn,nli,mly,taa } @zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [208.1230010986328, 606.532470703125, 379.310791015625, 616.525390625], "page": 1, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "Abstract", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [145.03118896484375, 565.769287109375, 190.65908813476562, 576.84765625], "page": 1, "span": [0, 8], "__ref_s3_data": null}]}, {"text": "a. Picture of a table:", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [315.37042236328125, 565.2451782226562, 408.4407043457031, 575.1142578125], "page": 1, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "1. Introduction", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [50.111976623535156, 241.30950927734375, 126.94803619384766, 252.81288146972656], "page": 1, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "The occurrence of tables in documents is ubiquitous. They often summarise quantitative or factual data, which is cumbersome to describe in verbose text but nevertheless extremely valuable. Unfortunately, this compact representation is often not easy to parse by machines. There are many implicit conventions used to obtain a compact table representation. For example, tables often have complex columnand row-headers in order to reduce duplicated cell content. Lines of different shapes and sizes are leveraged to separate content or indicate a tree structure. Additionally, tables can also have empty/missing table-entries or multi-row textual table-entries. Fig. 1 shows a table which presents all these issues.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.18265151977539, 78.84822082519531, 286.3650817871094, 232.2576904296875], "page": 1, "span": [0, 712], "__ref_s3_data": null}]}, {"text": "Tables organize valuable content in a concise and compact representation. This content is extremely valuable for systems such as search engines, Knowledge Graph's, etc, since they enhance their predictive capabilities. Unfortunately, tables come in a large variety of shapes and sizes. Furthermore, they can have complex column/row-header configurations, multiline rows, different variety of separation lines, missing entries, etc. As such, the correct identification of the table-structure from an image is a nontrivial task. In this paper, we present a new table-structure identification model. The latter improves the latest end-toend deep learning model (i.e. encoder-dual-decoder from PubTabNet) in two significant ways. First, we introduce a new object detection decoder for table-cells. In this way, we can obtain the content of the table-cells from programmatic PDF's directly from the PDF source and avoid the training of the custom OCR decoders. This architectural change leads to more accurate table-content extraction and allows us to tackle non-english tables. Second, we replace the LSTM decoders with transformer based decoders. This upgrade improves significantly the previous state-of-the-art tree-editing-distance-score (TEDS) from 91% to 98.5% on simple tables and from 88.7% to 95% on complex tables.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [48.88529968261719, 277.8124694824219, 286.7518310546875, 551.832275390625], "page": 1, "span": [0, 1320], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"text": "b. Red-annotation of bounding boxes, Blue-predictions by TableFormer", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [315.25408935546875, 458.4998779296875, 486.4019470214844, 479.3412780761719], "page": 1, "span": [0, 68], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"text": "c. Structure predicted by TableFormer:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [315.3083801269531, 362.0699157714844, 491.1912536621094, 372.62310791015625], "page": 1, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 1: Picture of a table with subtle, complex features such as (1) multi-column headers, (2) cell with multi-row text and (3) cells with no content. Image from PubTabNet evaluation set, filename: 'PMC2944238 004 02'.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [307.8612060546875, 232.7270965576172, 545.1151733398438, 278.37225341796875], "page": 1, "span": [0, 220], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"text": "Recently, significant progress has been made with vision based approaches to extract tables in documents. For the sake of completeness, the issue of table extraction from documents is typically decomposed into two separate challenges, i.e. (1) finding the location of the table(s) on a document-page and (2) finding the structure of a given table in the document.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.8420104980469, 126.95307159423828, 545.50439453125, 208.4013671875], "page": 1, "span": [0, 363], "__ref_s3_data": null}]}, {"text": "The first problem is called table-location and has been previously addressed [30, 38, 19, 21, 23, 26, 8] with stateof-the-art object-detection networks (e.g. YOLO and later on Mask-RCNN [9]). For all practical purposes, it can be", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.9032287597656, 78.15805053710938, 545.4091796875, 124.39737701416016], "page": 1, "span": [0, 229], "__ref_s3_data": null}]}, {"text": "1", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [295.1210021972656, 48.9600715637207, 300.102294921875, 58.62150192260742], "page": 1, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "considered as a solved problem, given enough ground-truth data to train on.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.38566970825195, 695.9300537109375, 286.36505126953125, 717.7666015625], "page": 2, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "The second problem is called table-structure decomposition. The latter is a long standing problem in the community of document understanding [6, 4, 14]. Contrary to the table-location problem, there are no commonly used approaches that can easily be re-purposed to solve this problem. Lately, a set of new model-architectures has been proposed by the community to address table-structure decomposition [37, 36, 18, 20]. All these models have some weaknesses (see Sec. 2). The common denominator here is the reliance on textual features and/or the inability to provide the bounding box of each table-cell in the original image.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.321495056152344, 563.8181762695312, 286.3651428222656, 693.599365234375], "page": 2, "span": [0, 626], "__ref_s3_data": null}]}, {"text": "In this paper, we want to address these weaknesses and present a robust table-structure decomposition algorithm. The design criteria for our model are the following. First, we want our algorithm to be language agnostic. In this way, we can obtain the structure of any table, irregardless of the language. Second, we want our algorithm to leverage as much data as possible from the original PDF document. For programmatic PDF documents, the text-cells can often be extracted much faster and with higher accuracy compared to OCR methods. Last but not least, we want to have a direct link between the table-cell and its bounding box in the image.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.25040817260742, 420.054931640625, 286.4363708496094, 561.5592651367188], "page": 2, "span": [0, 643], "__ref_s3_data": null}]}, {"text": "To meet the design criteria listed above, we developed a new model called TableFormer and a synthetically generated table structure dataset called SynthTabNet $^{1}$. In particular, our contributions in this work can be summarised as follows:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.561458587646484, 359.8269958496094, 286.3665771484375, 417.9549255371094], "page": 2, "span": [0, 242], "__ref_s3_data": null}]}, {"text": "\u00b7 We propose TableFormer , a transformer based model that predicts tables structure and bounding boxes for the table content simultaneously in an end-to-end approach.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [61.259342193603516, 302.3851318359375, 286.62158203125, 348.7479553222656], "page": 2, "span": [0, 166], "__ref_s3_data": null}]}, {"text": "\u00b7 Across all benchmark datasets TableFormer significantly outperforms existing state-of-the-art metrics, while being much more efficient in training and inference to existing works.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [61.2220458984375, 244.87574768066406, 286.3648986816406, 291.1054992675781], "page": 2, "span": [0, 181], "__ref_s3_data": null}]}, {"text": "\u00b7 We present SynthTabNet a synthetically generated dataset, with various appearance styles and complexity.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [61.282081604003906, 199.1899871826172, 286.36492919921875, 233.3485107421875], "page": 2, "span": [0, 106], "__ref_s3_data": null}]}, {"text": "\u00b7 An augmented dataset based on PubTabNet [37], FinTabNet [36], and TableBank [17] with generated ground-truth for reproducibility.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [61.136634826660156, 153.24447631835938, 286.3650817871094, 187.8387451171875], "page": 2, "span": [0, 131], "__ref_s3_data": null}]}, {"text": "The paper is structured as follows. In Sec. 2, we give a brief overview of the current state-of-the-art. In Sec. 3, we describe the datasets on which we train. In Sec. 4, we introduce the TableFormer model-architecture and describe", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.235233306884766, 96.42024230957031, 286.66033935546875, 142.322998046875], "page": 2, "span": [0, 231], "__ref_s3_data": null}]}, {"text": "$^{1}$https://github.com/IBM/SynthTabNet", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [60.97100067138672, 78.53706359863281, 183.7305450439453, 87.67019653320312], "page": 2, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "2", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.6210632324219, 48.96015548706055, 300.1224365234375, 58.64695739746094], "page": 2, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "its results & performance in Sec. 5. As a conclusion, we describe how this new model-architecture can be re-purposed for other tasks in the computer-vision community.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.2301025390625, 683.5604248046875, 545.4613037109375, 717.81787109375], "page": 2, "span": [0, 166], "__ref_s3_data": null}]}, {"text": "2. Previous work and State of the Art", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [307.9626770019531, 659.5203247070312, 498.43707275390625, 671.0046997070312], "page": 2, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "Identifying the structure of a table has been an outstanding problem in the document-parsing community, that motivates many organised public challenges [6, 4, 14]. The difficulty of the problem can be attributed to a number of factors. First, there is a large variety in the shapes and sizes of tables. Such large variety requires a flexible method. This is especially true for complex column- and row headers, which can be extremely intricate and demanding. A second factor of complexity is the lack of data with regard to table-structure. Until the publication of PubTabNet [37], there were no large datasets (i.e. > 100 K tables) that provided structure information. This happens primarily due to the fact that tables are notoriously time-consuming to annotate by hand. However, this has definitely changed in recent years with the deliverance of PubTabNet [37], FinTabNet [36], TableBank [17] etc.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.70526123046875, 461.54498291015625, 545.4183959960938, 651.0844116210938], "page": 2, "span": [0, 901], "__ref_s3_data": null}]}, {"text": "Before the rising popularity of deep neural networks, the community relied heavily on heuristic and/or statistical methods to do table structure identification [3, 7, 11, 5, 13, 28]. Although such methods work well on constrained tables [12], a more data-driven approach can be applied due to the advent of convolutional neural networks (CNNs) and the availability of large datasets. To the best-of-our knowledge, there are currently two different types of network architecture that are being pursued for state-of-the-art tablestructure identification.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.6594543457031, 341.9270935058594, 545.2876586914062, 459.7470397949219], "page": 2, "span": [0, 552], "__ref_s3_data": null}]}, {"text": "Image-to-Text networks : In this type of network, one predicts a sequence of tokens starting from an encoded image. Such sequences of tokens can be HTML table tags [37, 17] or LaTeX symbols[10]. The choice of symbols is ultimately not very important, since one can be transformed into the other. There are however subtle variations in the Image-to-Text networks. The easiest network architectures are \"image-encoder \u2192 text-decoder\" (IETD), similar to network architectures that try to provide captions to images [32]. In these IETD networks, one expects as output the LaTeX/HTML string of the entire table, i.e. the symbols necessary for creating the table with the content of the table. Another approach is the \"image-encoder \u2192 dual decoder\" (IEDD) networks. In these type of networks, one has two consecutive decoders with different purposes. The first decoder is the tag-decoder , i.e. it only produces the HTML/LaTeX tags which construct an empty table. The second content-decoder uses the encoding of the image in combination with the output encoding of each cell-tag (from the tag-decoder ) to generate the textual content of each table cell. The network architecture of IEDD is certainly more elaborate, but it has the advantage that one can pre-train the", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.7598571777344, 78.7093505859375, 545.48876953125, 340.0845947265625], "page": 2, "span": [0, 1262], "__ref_s3_data": null}]}, {"text": "tag-decoder which is constrained to the table-tags.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.48567581176758, 707.8850708007812, 250.15101623535156, 717.7049560546875], "page": 3, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "In practice, both network architectures (IETD and IEDD) require an implicit, custom trained object-characterrecognition (OCR) to obtain the content of the table-cells. In the case of IETD, this OCR engine is implicit in the decoder similar to [24]. For the IEDD, the OCR is solely embedded in the content-decoder. This reliance on a custom, implicit OCR decoder is of course problematic. OCR is a well known and extremely tough problem, that often needs custom training for each individual language. However, the limited availability for non-english content in the current datasets, makes it impractical to apply the IETD and IEDD methods on tables with other languages. Additionally, OCR can be completely omitted if the tables originate from programmatic PDF documents with known positions of each cell. The latter was the inspiration for the work of this paper.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.11482238769531, 515.9454345703125, 286.3651428222656, 706.0012817382812], "page": 3, "span": [0, 864], "__ref_s3_data": null}]}, {"text": "Graph Neural networks : Graph Neural networks (GNN's) take a radically different approach to tablestructure extraction. Note that one table cell can constitute out of multiple text-cells. To obtain the table-structure, one creates an initial graph, where each of the text-cells becomes a node in the graph similar to [33, 34, 2]. Each node is then associated with en embedding vector coming from the encoded image, its coordinates and the encoded text. Furthermore, nodes that represent adjacent text-cells are linked. Graph Convolutional Networks (GCN's) based methods take the image as an input, but also the position of the text-cells and their content [18]. The purpose of a GCN is to transform the input graph into a new graph, which replaces the old links with new ones. The new links then represent the table-structure. With this approach, one can avoid the need to build custom OCR decoders. However, the quality of the reconstructed structure is not comparable to the current state-of-the-art [18].", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.13818359375, 301.297119140625, 286.5478210449219, 514.357421875], "page": 3, "span": [0, 1007], "__ref_s3_data": null}]}, {"text": "Hybrid Deep Learning-Rule-Based approach : A popular current model for table-structure identification is the use of a hybrid Deep Learning-Rule-Based approach similar to [27, 29]. In this approach, one first detects the position of the table-cells with object detection (e.g. YoloVx or MaskRCNN), then classifies the table into different types (from its images) and finally uses different rule-sets to obtain its table-structure. Currently, this approach achieves stateof-the-art results, but is not an end-to-end deep-learning method. As such, new rules need to be written if different types of tables are encountered.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.34700393676758, 169.18182373046875, 286.6784362792969, 299.23980712890625], "page": 3, "span": [0, 619], "__ref_s3_data": null}]}, {"text": "3. Datasets", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.34483337402344, 145.30743408203125, 105.30262756347656, 156.634765625], "page": 3, "span": [0, 11], "__ref_s3_data": null}]}, {"text": "We rely on large-scale datasets such as PubTabNet [37], FinTabNet [36], and TableBank [17] datasets to train and evaluate our models. These datasets span over various appearance styles and content. We also introduce our own synthetically generated SynthTabNet dataset to fix an im-", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.3426628112793, 78.83715057373047, 286.368896484375, 136.55197143554688], "page": 3, "span": [0, 281], "__ref_s3_data": null}]}, {"text": "3", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.4361877441406, 48.96023941040039, 300.102294921875, 58.61145782470703], "page": 3, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Figure 2: Distribution of the tables across different table dimensions in PubTabNet + FinTabNet datasets", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [308.0231628417969, 503.3020935058594, 545.1151123046875, 524.915283203125], "page": 3, "span": [0, 104], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"text": "balance in the previous datasets.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.9623718261719, 465.4779968261719, 437.27001953125, 475.4662170410156], "page": 3, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "The PubTabNet dataset contains 509k tables delivered as annotated PNG images. The annotations consist of the table structure represented in HTML format, the tokenized text and its bounding boxes per table cell. Fig. 1 shows the appearance style of PubTabNet. Depending on its complexity, a table is characterized as \"simple\" when it does not contain row spans or column spans, otherwise it is \"complex\". The dataset is divided into Train and Val splits (roughly 98% and 2%). The Train split consists of 54% simple and 46% complex tables and the Val split of 51% and 49% respectively. The FinTabNet dataset contains 112k tables delivered as single-page PDF documents with mixed table structures and text content. Similarly to the PubTabNet, the annotations of FinTabNet include the table structure in HTML, the tokenized text and the bounding boxes on a table cell basis. The dataset is divided into Train, Test and Val splits (81%, 9.5%, 9.5%), and each one is almost equally divided into simple and complex tables (Train: 48% simple, 52% complex, Test: 48% simple, 52% complex, Test: 53% simple, 47% complex). Finally the TableBank dataset consists of 145k tables provided as JPEG images. The latter has annotations for the table structure, but only few with bounding boxes of the table cells. The entire dataset consists of simple tables and it is divided into 90% Train, 3% Test and 7% Val splits.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.9255065917969, 164.26060485839844, 545.6851196289062, 461.7036437988281], "page": 3, "span": [0, 1400], "__ref_s3_data": null}]}, {"text": "Due to the heterogeneity across the dataset formats, it was necessary to combine all available data into one homogenized dataset before we could train our models for practical purposes. Given the size of PubTabNet, we adopted its annotation format and we extracted and converted all tables as PNG images with a resolution of 72 dpi. Additionally, we have filtered out tables with extreme sizes due to small", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.7597351074219, 78.84823608398438, 545.282958984375, 160.3302001953125], "page": 3, "span": [0, 406], "__ref_s3_data": null}]}, {"text": "amount of such tables, and kept only those ones ranging between 1*1 and 20*10 (rows/columns).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.14799118041992, 695.9300537109375, 286.3651123046875, 717.7029418945312], "page": 4, "span": [0, 93], "__ref_s3_data": null}]}, {"text": "The availability of the bounding boxes for all table cells is essential to train our models. In order to distinguish between empty and non-empty bounding boxes, we have introduced a binary class in the annotation. Unfortunately, the original datasets either omit the bounding boxes for whole tables (e.g. TableBank) or they narrow their scope only to non-empty cells. Therefore, it was imperative to introduce a data pre-processing procedure that generates the missing bounding boxes out of the annotation information. This procedure first parses the provided table structure and calculates the dimensions of the most fine-grained grid that covers the table structure. Notice that each table cell may occupy multiple grid squares due to row or column spans. In case of PubTabNet we had to compute missing bounding boxes for 48% of the simple and 69% of the complex tables. Regarding FinTabNet, 68% of the simple and 98% of the complex tables require the generation of bounding boxes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.21862030029297, 478.8949279785156, 286.5638732910156, 691.9474487304688], "page": 4, "span": [0, 983], "__ref_s3_data": null}]}, {"text": "As it is illustrated in Fig. 2, the table distributions from all datasets are skewed towards simpler structures with fewer number of rows/columns. Additionally, there is very limited variance in the table styles, which in case of PubTabNet and FinTabNet means one styling format for the majority of the tables. Similar limitations appear also in the type of table content, which in some cases (e.g. FinTabNet) is restricted to a certain domain. Ultimately, the lack of diversity in the training dataset damages the ability of the models to generalize well on unseen data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.344085693359375, 356.89642333984375, 286.75341796875, 474.9582214355469], "page": 4, "span": [0, 571], "__ref_s3_data": null}]}, {"text": "Motivated by those observations we aimed at generating a synthetic table dataset named SynthTabNet . This approach offers control over: 1) the size of the dataset, 2) the table structure, 3) the table style and 4) the type of content. The complexity of the table structure is described by the size of the table header and the table body, as well as the percentage of the table cells covered by row spans and column spans. A set of carefully designed styling templates provides the basis to build a wide range of table appearances. Lastly, the table content is generated out of a curated collection of text corpora. By controlling the size and scope of the synthetic datasets we are able to train and evaluate our models in a variety of different conditions. For example, we can first generate a highly diverse dataset to train our models and then evaluate their performance on other synthetic datasets which are focused on a specific domain.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.167423248291016, 164.30380249023438, 286.73486328125, 353.8109436035156], "page": 4, "span": [0, 941], "__ref_s3_data": null}]}, {"text": "In this regard, we have prepared four synthetic datasets, each one containing 150k examples. The corpora to generate the table text consists of the most frequent terms appearing in PubTabNet and FinTabNet together with randomly generated text. The first two synthetic datasets have been fine-tuned to mimic the appearance of the original datasets but encompass more complicated table structures. The third", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.45711135864258, 78.84810638427734, 286.5352783203125, 160.2691650390625], "page": 4, "span": [0, 405], "__ref_s3_data": null}]}, {"text": "4", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.497802734375, 48.96018600463867, 300.2393798828125, 58.43728256225586], "page": 4, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Table 1: Both \"Combined-Tabnet\" and \"CombinedTabnet\" are variations of the following: (*) The CombinedTabnet dataset is the processed combination of PubTabNet and Fintabnet. (**) The combined dataset is the processed combination of PubTabNet, Fintabnet and TableBank.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [307.6622009277344, 567.6110229492188, 545.1150512695312, 625.0201416015625], "page": 4, "span": [0, 267], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/2"}, {"text": "one adopts a colorful appearance with high contrast and the last one contains tables with sparse content. Lastly, we have combined all synthetic datasets into one big unified synthetic dataset of 600k examples.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.8670349121094, 497.6080322265625, 545.1443481445312, 542.9603271484375], "page": 4, "span": [0, 210], "__ref_s3_data": null}]}, {"text": "Tab. 1 summarizes the various attributes of the datasets.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [320.144287109375, 485.321044921875, 542.7439575195312, 494.8341979980469], "page": 4, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "4. The TableFormer model", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [307.9104309082031, 460.0683288574219, 444.9360656738281, 471.7930908203125], "page": 4, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "Given the image of a table, TableFormer is able to predict: 1) a sequence of tokens that represent the structure of a table, and 2) a bounding box coupled to a subset of those tokens. The conversion of an image into a sequence of tokens is a well-known task [35, 16]. While attention is often used as an implicit method to associate each token of the sequence with a position in the original image, an explicit association between the individual table-cells and the image bounding boxes is also required.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.71002197265625, 344.3941345214844, 545.5623779296875, 451.22589111328125], "page": 4, "span": [0, 504], "__ref_s3_data": null}]}, {"text": "4.1. Model architecture.", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [307.70916748046875, 324.45367431640625, 420.16058349609375, 334.8069763183594], "page": 4, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "We now describe in detail the proposed method, which is composed of three main components, see Fig. 4. Our CNN Backbone Network encodes the input as a feature vector of predefined length. The input feature vector of the encoded image is passed to the Structure Decoder to produce a sequence of HTML tags that represent the structure of the table. With each prediction of an HTML standard data cell (' < td > ') the hidden state of that cell is passed to the Cell BBox Decoder. As for spanning cells, such as row or column span, the tag is broken down to ' < ', 'rowspan=' or 'colspan=', with the number of spanning cells (attribute), and ' > '. The hidden state attached to ' < ' is passed to the Cell BBox Decoder. A shared feed forward network (FFN) receives the hidden states from the Structure Decoder, to provide the final detection predictions of the bounding box coordinates and their classification.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.7804870605469, 127.00019073486328, 545.635986328125, 316.6053466796875], "page": 4, "span": [0, 907], "__ref_s3_data": null}]}, {"text": "CNN Backbone Network. A ResNet-18 CNN is the backbone that receives the table image and encodes it as a vector of predefined length. The network has been modified by removing the linear and pooling layer, as we are not per-", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.7881164550781, 78.76947021484375, 545.50244140625, 124.62178039550781], "page": 4, "span": [0, 223], "__ref_s3_data": null}]}, {"text": "Figure 3: TableFormer takes in an image of the PDF and creates bounding box and HTML structure predictions that are synchronized. The bounding boxes grabs the content from the PDF and inserts it in the structure.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.26235580444336, 566.526611328125, 545.1981201171875, 588.8561401367188], "page": 5, "span": [0, 212], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"text": "Figure 4: Given an input image of a table, the Encoder produces fixed-length features that represent the input image. The features are then passed to both the Structure Decoder and Cell BBox Decoder . During training, the Structure Decoder receives 'tokenized tags' of the HTML code that represent the table structure. Afterwards, a transformer encoder and decoder architecture is employed to produce features that are received by a linear layer, and the Cell BBox Decoder. The linear layer is applied to the features to predict the tags. Simultaneously, the Cell BBox Decoder selects features referring to the data cells (' < td > ', ' < ') and passes them through an attention network, an MLP, and a linear layer to predict the bounding boxes.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.09820556640625, 111.42295837402344, 286.6905822753906, 265.1697998046875], "page": 5, "span": [0, 745], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"text": "forming classification, and adding an adaptive pooling layer of size 28*28. ResNet by default downsamples the image resolution by 32 and then the encoded image is provided to both the Structure Decoder , and Cell BBox Decoder .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.04541015625, 497.69305419921875, 545.3578491210938, 543.414794921875], "page": 5, "span": [0, 227], "__ref_s3_data": null}]}, {"text": "Structure Decoder. The transformer architecture of this component is based on the work proposed in [31]. After extensive experimentation, the Structure Decoder is modeled as a transformer encoder with two encoder layers and a transformer decoder made from a stack of 4 decoder layers that comprise mainly of multi-head attention and feed forward layers. This configuration uses fewer layers and heads in comparison to networks applied to other problems (e.g. \"Scene Understanding\", \"Image Captioning\"), something which we relate to the simplicity of table images.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.867919921875, 377.2391662597656, 545.4237060546875, 495.6871337890625], "page": 5, "span": [0, 563], "__ref_s3_data": null}]}, {"text": "The transformer encoder receives an encoded image from the CNN Backbone Network and refines it through a multi-head dot-product attention layer, followed by a Feed Forward Network. During training, the transformer decoder receives as input the output feature produced by the transformer encoder, and the tokenized input of the HTML ground-truth tags. Using a stack of multi-head attention layers, different aspects of the tag sequence could be inferred. This is achieved by each attention head on a layer operating in a different subspace, and then combining altogether their attention score.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.0282287597656, 246.4272918701172, 545.38037109375, 375.6052551269531], "page": 5, "span": [0, 592], "__ref_s3_data": null}]}, {"text": "Cell BBox Decoder. Our architecture allows to simultaneously predict HTML tags and bounding boxes for each table cell without the need of a separate object detector end to end. This approach is inspired by DETR [1] which employs a Transformer Encoder, and Decoder that looks for a specific number of object queries (potential object detections). As our model utilizes a transformer architecture, the hidden state of the < td > ' and ' < ' HTML structure tags become the object query.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.9319152832031, 138.2592010498047, 545.4405517578125, 244.52415466308594], "page": 5, "span": [0, 483], "__ref_s3_data": null}]}, {"text": "The encoding generated by the CNN Backbone Network along with the features acquired for every data cell from the Transformer Decoder are then passed to the attention network. The attention network takes both inputs and learns to provide an attention weighted encoding. This weighted at-", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.91424560546875, 78.50503540039062, 545.3233032226562, 136.1419219970703], "page": 5, "span": [0, 286], "__ref_s3_data": null}]}, {"text": "5", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.6537170410156, 48.96027755737305, 300.10223388671875, 58.62459945678711], "page": 5, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "tention encoding is then multiplied to the encoded image to produce a feature for each table cell. Notice that this is different than the typical object detection problem where imbalances between the number of detections and the amount of objects may exist. In our case, we know up front that the produced detections always match with the table cells in number and correspondence.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.33885192871094, 635.6671752929688, 286.5264587402344, 717.6875610351562], "page": 6, "span": [0, 380], "__ref_s3_data": null}]}, {"text": "The output features for each table cell are then fed into the feed-forward network (FFN). The FFN consists of a Multi-Layer Perceptron (3 layers with ReLU activation function) that predicts the normalized coordinates for the bounding box of each table cell. Finally, the predicted bounding boxes are classified based on whether they are empty or not using a linear layer.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.410404205322266, 551.5345458984375, 286.57147216796875, 632.9039916992188], "page": 6, "span": [0, 371], "__ref_s3_data": null}]}, {"text": "Loss Functions. We formulate a multi-task loss Eq. 2 to train our network. The Cross-Entropy loss (denoted as l$_{s}$ ) is used to train the Structure Decoder which predicts the structure tokens. As for the Cell BBox Decoder it is trained with a combination of losses denoted as l$_{box}$ . l$_{box}$ consists of the generally used l$_{1}$ loss for object detection and the IoU loss ( l$_{iou}$ ) to be scale invariant as explained in [25]. In comparison to DETR, we do not use the Hungarian algorithm [15] to match the predicted bounding boxes with the ground-truth boxes, as we have already achieved a one-toone match through two steps: 1) Our token input sequence is naturally ordered, therefore the hidden states of the table data cells are also in order when they are provided as input to the Cell BBox Decoder , and 2) Our bounding boxes generation mechanism (see Sec. 3) ensures a one-to-one mapping between the cell content and its bounding box for all post-processed datasets.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.193443298339844, 347.69293212890625, 286.7899475097656, 548.953125], "page": 6, "span": [0, 985], "__ref_s3_data": null}]}, {"text": "The loss used to train the TableFormer can be defined as following:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.70558166503906, 323.12811279296875, 286.364990234375, 344.9725341796875], "page": 6, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "l$_{box}$ = \u03bb$_{iou}$l$_{iou}$ + \u03bb$_{l}$$_{1}$ l = \u03bbl$_{s}$ + (1 - \u03bb ) l$_{box}$ (1)", "type": "equation", "name": "Formula", "font": null, "prov": [{"bbox": [123.6725845336914, 274.2152404785156, 286.3624267578125, 299.7832336425781], "page": 6, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "where \u03bb \u2208 [0, 1], and \u03bb$_{iou}$, \u03bb$_{l}$$_{1}$ \u2208$_{R}$ are hyper-parameters.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.33706283569336, 251.3388671875, 281.596923828125, 261.4623718261719], "page": 6, "span": [0, 76], "__ref_s3_data": null}]}, {"text": "5. Experimental Results", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.722251892089844, 225.24951171875, 172.0424041748047, 236.80613708496094], "page": 6, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "5.1. Implementation Details", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.44451904296875, 205.55343627929688, 179.17501831054688, 216.1990509033203], "page": 6, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "TableFormer uses ResNet-18 as the CNN Backbone Network . The input images are resized to 448*448 pixels and the feature map has a dimension of 28*28. Additionally, we enforce the following input constraints:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.39978790283203, 150.96865844726562, 286.6137390136719, 196.80064392089844], "page": 6, "span": [0, 207], "__ref_s3_data": null}]}, {"text": "Image width and height \u2264 1024 pixels Structural tags length \u2264 512 tokens. (2)", "type": "equation", "name": "Formula", "font": null, "prov": [{"bbox": [91.03668212890625, 113.60411834716797, 286.3624572753906, 138.33775329589844], "page": 6, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "Although input constraints are used also by other methods, such as EDD, ours are less restrictive due to the improved", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.08633041381836, 78.79530334472656, 286.4946594238281, 100.13447570800781], "page": 6, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "6", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.5410461425781, 48.96010971069336, 300.3391418457031, 58.42277908325195], "page": 6, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "runtime performance and lower memory footprint of TableFormer. This allows to utilize input samples with longer sequences and images with larger dimensions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.90777587890625, 683.57861328125, 545.310302734375, 717.6608276367188], "page": 6, "span": [0, 156], "__ref_s3_data": null}]}, {"text": "The Transformer Encoder consists of two \"Transformer Encoder Layers\", with an input feature size of 512, feed forward network of 1024, and 4 attention heads. As for the Transformer Decoder it is composed of four \"Transformer Decoder Layers\" with similar input and output dimensions as the \"Transformer Encoder Layers\". Even though our model uses fewer layers and heads than the default implementation parameters, our extensive experimentation has proved this setup to be more suitable for table images. We attribute this finding to the inherent design of table images, which contain mostly lines and text, unlike the more elaborate content present in other scopes (e.g. the COCO dataset). Moreover, we have added ResNet blocks to the inputs of the Structure Decoder and Cell BBox Decoder. This prevents a decoder having a stronger influence over the learned weights which would damage the other prediction task (structure vs bounding boxes), but learn task specific weights instead. Lastly our dropout layers are set to 0.5.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.8103332519531, 463.4059143066406, 545.45947265625, 677.0466918945312], "page": 6, "span": [0, 1024], "__ref_s3_data": null}]}, {"text": "For training, TableFormer is trained with 3 Adam optimizers, each one for the CNN Backbone Network , Structure Decoder , and Cell BBox Decoder . Taking the PubTabNet as an example for our parameter set up, the initializing learning rate is 0.001 for 12 epochs with a batch size of 24, and \u03bb set to 0.5. Afterwards, we reduce the learning rate to 0.0001, the batch size to 18 and train for 12 more epochs or convergence.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.79974365234375, 362.4031982421875, 545.5565185546875, 456.2726745605469], "page": 6, "span": [0, 419], "__ref_s3_data": null}]}, {"text": "TableFormer is implemented with PyTorch and Torchvision libraries [22]. To speed up the inference, the image undergoes a single forward pass through the CNN Backbone Network and transformer encoder. This eliminates the overhead of generating the same features for each decoding step. Similarly, we employ a 'caching' technique to preform faster autoregressive decoding. This is achieved by storing the features of decoded tokens so we can reuse them for each time step. Therefore, we only compute the attention for each new tag.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.99951171875, 237.81484985351562, 545.37939453125, 355.8169250488281], "page": 6, "span": [0, 528], "__ref_s3_data": null}]}, {"text": "5.2. Generalization", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [308.2174377441406, 202.5936279296875, 397.44281005859375, 212.9311981201172], "page": 6, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "TableFormer is evaluated on three major publicly available datasets of different nature to prove the generalization and effectiveness of our model. The datasets used for evaluation are the PubTabNet, FinTabNet and TableBank which stem from the scientific, financial and general domains respectively.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.0255126953125, 119.17848205566406, 545.1151733398438, 189.6819610595703], "page": 6, "span": [0, 299], "__ref_s3_data": null}]}, {"text": "We also share our baseline results on the challenging SynthTabNet dataset. Throughout our experiments, the same parameters stated in Sec. 5.1 are utilized.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.2952575683594, 78.51296997070312, 545.2591552734375, 112.20266723632812], "page": 6, "span": [0, 155], "__ref_s3_data": null}]}, {"text": "5.3. Datasets and Metrics", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.5137939453125, 707.74658203125, 167.9048614501953, 718.37353515625], "page": 7, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "The Tree-Edit-Distance-Based Similarity (TEDS) metric was introduced in [37]. It represents the prediction, and ground-truth as a tree structure of HTML tags. This similarity is calculated as:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.42784881591797, 653.5985107421875, 286.3651123046875, 699.5994873046875], "page": 7, "span": [0, 192], "__ref_s3_data": null}]}, {"text": "TEDS ( T$_{a}$, T$_{b}$ ) = 1 - EditDist ( T$_{a}$, T$_{b}$ ) max ( | T$_{a}$ | , | T$_{b}$ | ) (3)", "type": "equation", "name": "Formula", "font": null, "prov": [{"bbox": [85.80982208251953, 618.6690063476562, 286.3623962402344, 642.4047241210938], "page": 7, "span": [0, 99], "__ref_s3_data": null}]}, {"text": "where T$_{a}$ and T$_{b}$ represent tables in tree structure HTML format. EditDist denotes the tree-edit distance, and | T | represents the number of nodes in T .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.60887908935547, 578.02099609375, 286.5648193359375, 611.3987426757812], "page": 7, "span": [0, 162], "__ref_s3_data": null}]}, {"text": "5.4. Quantitative Analysis", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.53438949584961, 556.8223266601562, 170.5989990234375, 567.4805908203125], "page": 7, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "Structure. As shown in Tab. 2, TableFormer outperforms all SOTA methods across different datasets by a large margin for predicting the table structure from an image. All the more, our model outperforms pre-trained methods. During the evaluation we do not apply any table filtering. We also provide our baseline results on the SynthTabNet dataset. It has been observed that large tables (e.g. tables that occupy half of the page or more) yield poor predictions. We attribute this issue to the image resizing during the preprocessing step, that produces downsampled images with indistinguishable features. This problem can be addressed by treating such big tables with a separate model which accepts a large input image size.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.37880325317383, 395.5296630859375, 286.5152282714844, 549.0050048828125], "page": 7, "span": [0, 723], "__ref_s3_data": null}]}, {"text": "Table 2: Structure results on PubTabNet (PTN), FinTabNet (FTN), TableBank (TB) and SynthTabNet (STN).", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.49094772338867, 177.64779663085938, 286.5975036621094, 200.5946807861328], "page": 7, "span": [0, 101], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/3"}, {"text": "FT: Model was trained on PubTabNet then finetuned.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.64347839355469, 166.7500762939453, 261.7873229980469, 176.285888671875], "page": 7, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "Cell Detection. Like any object detector, our Cell BBox Detector provides bounding boxes that can be improved with post-processing during inference. We make use of the grid-like structure of tables to refine the predictions. A detailed explanation on the post-processing is available in the supplementary material. As shown in Tab. 3, we evaluate", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.33903503417969, 78.39030456542969, 286.56085205078125, 148.4354248046875], "page": 7, "span": [0, 346], "__ref_s3_data": null}]}, {"text": "7", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.697509765625, 48.960079193115234, 300.1951904296875, 58.69574737548828], "page": 7, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "our Cell BBox Decoder accuracy for cells with a class label of 'content' only using the PASCAL VOC mAP metric for pre-processing and post-processing. Note that we do not have post-processing results for SynthTabNet as images are only provided. To compare the performance of our proposed approach, we've integrated TableFormer's Cell BBox Decoder into EDD architecture. As mentioned previously, the Structure Decoder provides the Cell BBox Decoder with the features needed to predict the bounding box predictions. Therefore, the accuracy of the Structure Decoder directly influences the accuracy of the Cell BBox Decoder . If the Structure Decoder predicts an extra column, this will result in an extra column of predicted bounding boxes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.0060729980469, 564.2849731445312, 545.2793579101562, 717.4856567382812], "page": 7, "span": [0, 737], "__ref_s3_data": null}]}, {"text": "Table 3: Cell Bounding Box detection results on PubTabNet, and FinTabNet. PP: Post-processing.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [308.1158752441406, 454.10968017578125, 545.1151733398438, 476.3189392089844], "page": 7, "span": [0, 94], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/4"}, {"text": "Cell Content. In this section, we evaluate the entire pipeline of recovering a table with content. Here we put our approach to test by capitalizing on extracting content from the PDF cells rather than decoding from images. Tab. 4 shows the TEDs score of HTML code representing the structure of the table along with the content inserted in the data cell and compared with the ground-truth. Our method achieved a 5.3% increase over the state-of-the-art, and commercial solutions. We believe our scores would be higher if the HTML ground-truth matched the extracted PDF cell content. Unfortunately, there are small discrepancies such as spacings around words or special characters with various unicode representations.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.0658874511719, 271.6740417480469, 545.6917724609375, 424.9032897949219], "page": 7, "span": [0, 715], "__ref_s3_data": null}]}, {"text": "Table 4: Results of structure with content retrieved using cell detection on PubTabNet. In all cases the input is PDF documents with cropped tables.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [307.8614807128906, 102.32206726074219, 545.4934692382812, 136.0908660888672], "page": 7, "span": [0, 148], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/5"}, {"text": "a. Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [52.30960464477539, 705.2244873046875, 499.8524169921875, 714.3070068359375], "page": 8, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "Japanese language (previously unseen by TableFormer):", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.81178283691406, 689.845703125, 284.3459167480469, 697.7188720703125], "page": 8, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "Example table from FinTabNet:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [304.830810546875, 689.845703125, 431.0911865234375, 697.7188720703125], "page": 8, "span": [0, 29], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"text": "b. Structure predicted by TableFormer, with superimposed matched PDF cell text:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [52.96451187133789, 575.38525390625, 385.93450927734375, 584.3953857421875], "page": 8, "span": [0, 79], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/6"}, {"text": "Text is aligned to match original for ease of viewing", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [380.35894775390625, 492.63519287109375, 549.4217529296875, 500.1993713378906], "page": 8, "span": [0, 53], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/7"}, {"text": "Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.346073150634766, 426.3501281738281, 545.1942138671875, 472.07501220703125], "page": 8, "span": [0, 397], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/6"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/7"}, {"text": "Figure 6: An example of TableFormer predictions (bounding boxes and structure) from generated SynthTabNet table.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [62.11488342285156, 324.1943359375, 532.6304931640625, 333.92657470703125], "page": 8, "span": [0, 112], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/8"}, {"text": "5.5. Qualitative Analysis", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.59223556518555, 290.7525939941406, 163.79299926757812, 301.4790344238281], "page": 8, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "We showcase several visualizations for the different components of our network on various \"complex\" tables within datasets presented in this work in Fig. 5 and Fig. 6 As it is shown, our model is able to predict bounding boxes for all table cells, even for the empty ones. Additionally, our post-processing techniques can extract the cell content by matching the predicted bounding boxes to the PDF cells based on their overlap and spatial proximity. The left part of Fig. 5 demonstrates also the adaptability of our method to any language, as it can successfully extract Japanese text, although the training set contains only English content. We provide more visualizations including the intermediate steps in the supplementary material. Overall these illustrations justify the versatility of our method across a diverse range of table appearances and content type.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.32524490356445, 77.88054656982422, 286.57904052734375, 256.4608459472656], "page": 8, "span": [0, 866], "__ref_s3_data": null}]}, {"text": "6. Future Work & Conclusion", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [308.3092041015625, 290.5433654785156, 460.8484802246094, 302.1797180175781], "page": 8, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "In this paper, we presented TableFormer an end-to-end transformer based approach to predict table structures and bounding boxes of cells from an image. This approach enables us to recreate the table structure, and extract the cell content from PDF or OCR by using bounding boxes. Additionally, it provides the versatility required in real-world scenarios when dealing with various types of PDF documents, and languages. Furthermore, our method outperforms all state-of-the-arts with a wide margin. Finally, we introduce \"SynthTabNet\" a challenging synthetically generated dataset that reinforces missing characteristics from other datasets.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.91571044921875, 138.69407653808594, 545.3740234375, 280.33050537109375], "page": 8, "span": [0, 640], "__ref_s3_data": null}]}, {"text": "References", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [308.3152770996094, 109.15335845947266, 364.5574035644531, 120.16545104980469], "page": 8, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "[1] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [313.0798034667969, 78.43290710449219, 545.2075805664062, 98.85089874267578], "page": 8, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "8", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.7445983886719, 48.9600715637207, 300.3660583496094, 58.06891632080078], "page": 8, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "end object detection with transformers. In Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm, editors, Computer Vision - ECCV 2020 , pages 213-229, Cham, 2020. Springer International Publishing. 5", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [68.82711791992188, 674.5938110351562, 286.4485778808594, 716.9931640625], "page": 9, "span": [0, 212], "__ref_s3_data": null}]}, {"text": "[2] Zewen Chi, Heyan Huang, Heng-Da Xu, Houjin Yu, Wanxuan Yin, and Xian-Ling Mao. Complicated table structure recognition. arXiv preprint arXiv:1908.04729 , 2019. 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.31157302856445, 641.926513671875, 286.36334228515625, 672.84130859375], "page": 9, "span": [0, 165], "__ref_s3_data": null}]}, {"text": "[3] Bertrand Couasnon and Aurelie Lemaitre. Recognition of Tables and Forms , pages 647-677. Springer London, London, 2014. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.34408187866211, 608.8453369140625, 286.4925231933594, 639.6375732421875], "page": 9, "span": [0, 125], "__ref_s3_data": null}]}, {"text": "[4] Herv'e D'ejean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), Apr. 2019. http://sac.founderit.com/. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.48258972167969, 564.0968017578125, 286.4743957519531, 606.2783203125], "page": 9, "span": [0, 216], "__ref_s3_data": null}]}, {"text": "[5] Basilios Gatos, Dimitrios Danatsas, Ioannis Pratikakis, and Stavros J Perantonis. Automatic table detection in document images. In International Conference on Pattern Recognition and Image Analysis , pages 609-618. Springer, 2005. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.2690315246582, 520.0095825195312, 286.5681457519531, 562.3704833984375], "page": 9, "span": [0, 236], "__ref_s3_data": null}]}, {"text": "[6] Max Gobel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.13688659667969, 476.3995056152344, 286.6510009765625, 518.47216796875], "page": 9, "span": [0, 193], "__ref_s3_data": null}]}, {"text": "[7] EA Green and M Krishnamoorthy. Recognition of tables using table grammars. procs. In Symposium on Document Analysis and Recognition (SDAIR'95) , pages 261-277. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.40808868408203, 443.2104797363281, 286.702880859375, 474.14404296875], "page": 9, "span": [0, 165], "__ref_s3_data": null}]}, {"text": "[8] Khurram Azeem Hashmi, Alain Pagani, Marcus Liwicki, Didier Stricker, and Muhammad Zeshan Afzal. Castabdetectors: Cascade network for table detection in document images with recursive feature pyramid and switchable atrous convolution. Journal of Imaging , 7(10), 2021. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.161720275878906, 387.5791320800781, 286.3633117675781, 440.8350524902344], "page": 9, "span": [0, 273], "__ref_s3_data": null}]}, {"text": "[9] Kaiming He, Georgia Gkioxari, Piotr Dollar, and Ross Girshick. Mask r-cnn. In Proceedings of the IEEE International Conference on Computer Vision (ICCV) , Oct 2017. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [54.05575180053711, 354.5013732910156, 286.4024963378906, 385.7289123535156], "page": 9, "span": [0, 170], "__ref_s3_data": null}]}, {"text": "[10] Yelin He, X. Qi, Jiaquan Ye, Peng Gao, Yihao Chen, Bingcong Li, Xin Tang, and Rong Xiao. Pingan-vcgroup's solution for icdar 2021 competition on scientific table image recognition to latex. ArXiv , abs/2105.01846, 2021. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.697265625, 310.1059875488281, 286.36334228515625, 352.1959533691406], "page": 9, "span": [0, 226], "__ref_s3_data": null}]}, {"text": "[11] Jianying Hu, Ramanujan S Kashi, Daniel P Lopresti, and Gordon Wilfong. Medium-independent table detection. In Document Recognition and Retrieval VII , volume 3967, pages 291-302. International Society for Optics and Photonics, 1999. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.60444641113281, 255.65762329101562, 286.5947265625, 308.168701171875], "page": 9, "span": [0, 239], "__ref_s3_data": null}]}, {"text": "[12] Matthew Hurst. A constraint-based approach to table structure derivation. In Proceedings of the Seventh International Conference on Document Analysis and Recognition - Volume 2 , ICDAR '03, page 911, USA, 2003. IEEE Computer Society. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.61537170410156, 200.55062866210938, 287.1019287109375, 253.0636444091797], "page": 9, "span": [0, 240], "__ref_s3_data": null}]}, {"text": "[13] Thotreingam Kasar, Philippine Barlas, Sebastien Adam, Cl'ement Chatelain, and Thierry Paquet. Learning to detect tables in scanned document images using line information. In 2013 12th International Conference on Document Analysis and Recognition , pages 1185-1189. IEEE, 2013. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.62745666503906, 144.75555419921875, 286.6357421875, 197.969482421875], "page": 9, "span": [0, 283], "__ref_s3_data": null}]}, {"text": "[14] Pratik Kayal, Mrinal Anand, Harsh Desai, and Mayank Singh. Icdar 2021 competition on scientific table image recognition to latex, 2021. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.66755294799805, 111.65099334716797, 286.39990234375, 142.97256469726562], "page": 9, "span": [0, 142], "__ref_s3_data": null}]}, {"text": "[15] Harold W Kuhn. The hungarian method for the assignment problem. Naval research logistics quarterly , 2(1-2):83-97, 1955. 6", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.800445556640625, 79.06361389160156, 286.36163330078125, 109.40152740478516], "page": 9, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "9", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [294.4941101074219, 48.96084976196289, 300.202392578125, 58.268619537353516], "page": 9, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "[16] Girish Kulkarni, Visruth Premraj, Vicente Ordonez, Sagnik Dhar, Siming Li, Yejin Choi, Alexander C. Berg, and Tamara L. Berg. Babytalk: Understanding and generating simple image descriptions. IEEE Transactions on Pattern Analysis and Machine Intelligence , 35(12):2891-2903, 2013. 4", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.5410461425781, 653.306640625, 545.3473510742188, 717.0559692382812], "page": 9, "span": [0, 287], "__ref_s3_data": null}]}, {"text": "[17] Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou, and Zhoujun Li. Tablebank: A benchmark dataset for table detection and recognition, 2019. 2, 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.45538330078125, 619.676025390625, 545.1134033203125, 650.8286743164062], "page": 9, "span": [0, 156], "__ref_s3_data": null}]}, {"text": "[18] Yiren Li, Zheng Huang, Junchi Yan, Yi Zhou, Fan Ye, and Xianhui Liu. Gfte: Graph-based financial table extraction. In Alberto Del Bimbo, Rita Cucchiara, Stan Sclaroff, Giovanni Maria Farinella, Tao Mei, Marco Bertini, Hugo Jair Escalante, and Roberto Vezzani, editors, Pattern Recognition. ICPR International Workshops and Challenges , pages 644-658, Cham, 2021. Springer International Publishing. 2, 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.3742370605469, 531.7857666015625, 545.3403930664062, 617.9396362304688], "page": 9, "span": [0, 407], "__ref_s3_data": null}]}, {"text": "[19] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter Staar. Robust pdf document conversion using recurrent neural networks. Proceedings of the AAAI Conference on Artificial Intelligence , 35(17):15137-15145, May 2021. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.3236083984375, 465.19134521484375, 545.2802734375, 529.0132446289062], "page": 9, "span": [0, 328], "__ref_s3_data": null}]}, {"text": "[20] Rujiao Long, Wen Wang, Nan Xue, Feiyu Gao, Zhibo Yang, Yongpan Wang, and Gui-Song Xia. Parsing table structures in the wild. In Proceedings of the IEEE/CVF International Conference on Computer Vision , pages 944-952, 2021. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.53900146484375, 420.8304138183594, 545.4502563476562, 463.07977294921875], "page": 9, "span": [0, 229], "__ref_s3_data": null}]}, {"text": "[21] Shubham Singh Paliwal, D Vishwanath, Rohit Rahul, Monika Sharma, and Lovekesh Vig. Tablenet: Deep learning model for end-to-end table detection and tabular data extraction from scanned document images. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 128-133. IEEE, 2019. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.5403747558594, 354.6231689453125, 545.1134643554688, 419.1334533691406], "page": 9, "span": [0, 315], "__ref_s3_data": null}]}, {"text": "[22] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. Pytorch: An imperative style, high-performance deep learning library. In H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch'e-Buc, E. Fox, and R. Garnett, editors, Advances in Neural Information Processing Systems 32 , pages 8024-8035. Curran Associates, Inc., 2019. 6", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.5554504394531, 233.79345703125, 545.36865234375, 352.73980712890625], "page": 9, "span": [0, 592], "__ref_s3_data": null}]}, {"text": "[23] Devashish Prasad, Ayan Gadpal, Kshitij Kapadni, Manish Visave, and Kavita Sultanpure. Cascadetabnet: An approach for end to end table detection and structure recognition from image-based documents. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops , pages 572-573, 2020. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.3363952636719, 166.97824096679688, 545.676025390625, 231.79393005371094], "page": 9, "span": [0, 322], "__ref_s3_data": null}]}, {"text": "[24] Shah Rukh Qasim, Hassan Mahmood, and Faisal Shafait. Rethinking table recognition using graph neural networks. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 142-147. IEEE, 2019. 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.458740234375, 123.00692749023438, 545.4988403320312, 165.11456298828125], "page": 9, "span": [0, 224], "__ref_s3_data": null}]}, {"text": "[25] Hamid Rezatofighi, Nathan Tsoi, JunYoung Gwak, Amir Sadeghian, Ian Reid, and Silvio Savarese. Generalized intersection over union: A metric and a loss for bounding box regression. In Proceedings of the IEEE/CVF Conference on", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.3419189453125, 79.0348892211914, 545.3016357421875, 121.25286865234375], "page": 9, "span": [0, 229], "__ref_s3_data": null}]}, {"text": "Computer Vision and Pattern Recognition , pages 658-666, 2019. 6", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [69.3697280883789, 697.1412353515625, 286.36175537109375, 716.9249267578125], "page": 10, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "[26] Sebastian Schreiber, Stefan Agne, Ivo Wolf, Andreas Dengel, and Sheraz Ahmed. Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 11621167, 2017. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.832733154296875, 631.0233154296875, 286.4916076660156, 694.9277954101562], "page": 10, "span": [0, 302], "__ref_s3_data": null}]}, {"text": "[27] Sebastian Schreiber, Stefan Agne, Ivo Wolf, Andreas Dengel, and Sheraz Ahmed. Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In 2017 14th IAPR international conference on document analysis and recognition (ICDAR) , volume 1, pages 1162-1167. IEEE, 2017. 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.52198791503906, 564.773193359375, 286.3633728027344, 628.47607421875], "page": 10, "span": [0, 308], "__ref_s3_data": null}]}, {"text": "[28] Faisal Shafait and Ray Smith. Table detection in heterogeneous documents. In Proceedings of the 9th IAPR International Workshop on Document Analysis Systems , pages 6572, 2010. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.72427749633789, 520.7044677734375, 286.36578369140625, 562.277099609375], "page": 10, "span": [0, 183], "__ref_s3_data": null}]}, {"text": "[29] Shoaib Ahmed Siddiqui, Imran Ali Fateh, Syed Tahseen Raza Rizvi, Andreas Dengel, and Sheraz Ahmed. Deeptabstr: Deep learning based table structure recognition. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 1403-1409. IEEE, 2019. 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.75909423828125, 465.14605712890625, 286.9628601074219, 518.4593505859375], "page": 10, "span": [0, 275], "__ref_s3_data": null}]}, {"text": "[30] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD , KDD '18, pages 774-782, New York, NY, USA, 2018. ACM. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.69480895996094, 410.3855285644531, 286.36334228515625, 463.3351135253906], "page": 10, "span": [0, 251], "__ref_s3_data": null}]}, {"text": "[31] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. Attention is all you need. In I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett, editors, Advances in Neural Information Processing Systems 30 , pages 5998-6008. Curran Associates, Inc., 2017. 5", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.900028228759766, 333.3085632324219, 286.3638916015625, 407.70587158203125], "page": 10, "span": [0, 366], "__ref_s3_data": null}]}, {"text": "[32] Oriol Vinyals, Alexander Toshev, Samy Bengio, and Dumitru Erhan. Show and tell: A neural image caption generator. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) , June 2015. 2", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.75345993041992, 289.1075744628906, 286.42437744140625, 331.030029296875], "page": 10, "span": [0, 221], "__ref_s3_data": null}]}, {"text": "[33] Wenyuan Xue, Qingyong Li, and Dacheng Tao. Res2tim: reconstruct syntactic structures from table images. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 749-755. IEEE, 2019. 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.72990417480469, 244.6327667236328, 286.7159118652344, 286.82855224609375], "page": 10, "span": [0, 217], "__ref_s3_data": null}]}, {"text": "[34] Wenyuan Xue, Baosheng Yu, Wen Wang, Dacheng Tao, and Qingyong Li. Tgrnet: A table graph reconstruction network for table structure recognition. arXiv preprint arXiv:2106.10598 , 2021. 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.77635192871094, 200.70655822753906, 286.65618896484375, 242.16712951660156], "page": 10, "span": [0, 190], "__ref_s3_data": null}]}, {"text": "[35] Quanzeng You, Hailin Jin, Zhaowen Wang, Chen Fang, and Jiebo Luo. Image captioning with semantic attention. In Proceedings of the IEEE conference on computer vision and pattern recognition , pages 4651-4659, 2016. 4", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.754791259765625, 156.28453063964844, 286.94097900390625, 197.91392517089844], "page": 10, "span": [0, 220], "__ref_s3_data": null}]}, {"text": "[36] Xinyi Zheng, Doug Burdick, Lucian Popa, Peter Zhong, and Nancy Xin Ru Wang. Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. Winter Conference for Applications in Computer Vision (WACV) , 2021. 2, 3", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.780601501464844, 101.19727325439453, 286.3633728027344, 154.27796936035156], "page": 10, "span": [0, 280], "__ref_s3_data": null}]}, {"text": "[37] Xu Zhong, Elaheh ShafieiBavani, and Antonio Jimeno Yepes. Image-based table recognition: Data, model,", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.777278900146484, 78.89190673828125, 286.36334228515625, 98.63412475585938], "page": 10, "span": [0, 106], "__ref_s3_data": null}]}, {"text": "10", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.6300048828125, 48.960445404052734, 302.6481628417969, 58.501861572265625], "page": 10, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "and evaluation. In Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm, editors, Computer Vision ECCV 2020 , pages 564-580, Cham, 2020. Springer International Publishing. 2, 3, 7", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [326.9217224121094, 674.8087158203125, 545.315673828125, 716.8610229492188], "page": 10, "span": [0, 192], "__ref_s3_data": null}]}, {"text": "[38] Xu Zhong, Jianbin Tang, and Antonio Jimeno Yepes. Publaynet: Largest dataset ever for document layout analysis. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 1015-1022, 2019. 1", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.47698974609375, 629.8935546875, 545.8077392578125, 672.1726684570312], "page": 10, "span": [0, 221], "__ref_s3_data": null}]}, {"text": "TableFormer: Table Structure Understanding with Transformers", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [131.88760375976562, 669.9490966796875, 465.37677001953125, 682.1016235351562], "page": 11, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "Supplementary Material", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [219.5913848876953, 656.1969604492188, 375.0426940917969, 669.7401733398438], "page": 11, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "1. Details on the datasets", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [50.08771514892578, 620.0913696289062, 175.96437072753906, 631.563232421875], "page": 11, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "1.1. Data preparation", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.920230865478516, 600.6398315429688, 150.364013671875, 611.6488037109375], "page": 11, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "As a first step of our data preparation process, we have calculated statistics over the datasets across the following dimensions: (1) table size measured in the number of rows and columns, (2) complexity of the table, (3) strictness of the provided HTML structure and (4) completeness (i.e. no omitted bounding boxes). A table is considered to be simple if it does not contain row spans or column spans. Additionally, a table has a strict HTML structure if every row has the same number of columns after taking into account any row or column spans. Therefore a strict HTML structure looks always rectangular. However, HTML is a lenient encoding format, i.e. tables with rows of different sizes might still be regarded as correct due to implicit display rules. These implicit rules leave room for ambiguity, which we want to avoid. As such, we prefer to have \"strict\" tables, i.e. tables where every row has exactly the same length.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.31182098388672, 403.1282653808594, 286.73223876953125, 592.9334716796875], "page": 11, "span": [0, 931], "__ref_s3_data": null}]}, {"text": "We have developed a technique that tries to derive a missing bounding box out of its neighbors. As a first step, we use the annotation data to generate the most fine-grained grid that covers the table structure. In case of strict HTML tables, all grid squares are associated with some table cell and in the presence of table spans a cell extends across multiple grid squares. When enough bounding boxes are known for a rectangular table, it is possible to compute the geometrical border lines between the grid rows and columns. Eventually this information is used to generate the missing bounding boxes. Additionally, the existence of unused grid squares indicates that the table rows have unequal number of columns and the overall structure is non-strict. The generation of missing bounding boxes for non-strict HTML tables is ambiguous and therefore quite challenging. Thus, we have decided to simply discard those tables. In case of PubTabNet we have computed missing bounding boxes for 48% of the simple and 69% of the complex tables. Regarding FinTabNet, 68% of the simple and 98% of the complex tables require the generation of bounding boxes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.21672058105469, 163.6554412841797, 286.7598571777344, 401.6038818359375], "page": 11, "span": [0, 1149], "__ref_s3_data": null}]}, {"text": "Figure 7 illustrates the distribution of the tables across different dimensions per dataset.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.46216583251953, 140.3820037841797, 286.3649597167969, 162.59068298339844], "page": 11, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "1.2. Synthetic datasets", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [50.11198425292969, 119.7578125, 153.92388916015625, 130.09898376464844], "page": 11, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "Aiming to train and evaluate our models in a broader spectrum of table data we have synthesized four types of datasets. Each one contains tables with different appear-", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.3624153137207, 77.53544616699219, 286.8395080566406, 111.57047271728516], "page": 11, "span": [0, 167], "__ref_s3_data": null}]}, {"text": "ances in regard to their size, structure, style and content. Every synthetic dataset contains 150k examples, summing up to 600k synthetic examples. All datasets are divided into Train, Test and Val splits (80%, 10%, 10%).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.9020690917969, 584.572265625, 545.1925659179688, 630.37939453125], "page": 11, "span": [0, 221], "__ref_s3_data": null}]}, {"text": "The process of generating a synthetic dataset can be decomposed into the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [307.86920166015625, 559.6962890625, 545.1150512695312, 581.7109375], "page": 11, "span": [0, 89], "__ref_s3_data": null}]}, {"text": "1. Prepare styling and content templates: The styling templates have been manually designed and organized into groups of scope specific appearances (e.g. financial data, marketing data, etc.) Additionally, we have prepared curated collections of content templates by extracting the most frequently used terms out of non-synthetic datasets (e.g. PubTabNet, FinTabNet, etc.).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.9952697753906, 475.45721435546875, 545.2145385742188, 557.0147705078125], "page": 11, "span": [0, 373], "__ref_s3_data": null}]}, {"text": "2. Generate table structures: The structure of each synthetic dataset assumes a horizontal table header which potentially spans over multiple rows and a table body that may contain a combination of row spans and column spans. However, spans are not allowed to cross the header - body boundary. The table structure is described by the parameters: Total number of table rows and columns, number of header rows, type of spans (header only spans, row only spans, column only spans, both row and column spans), maximum span size and the ratio of the table area covered by spans.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.81939697265625, 342.9166259765625, 545.316650390625, 472.3519592285156], "page": 11, "span": [0, 573], "__ref_s3_data": null}]}, {"text": "3. Generate content: Based on the dataset theme , a set of suitable content templates is chosen first. Then, this content can be combined with purely random text to produce the synthetic content.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.7553405761719, 294.5247802734375, 545.6655883789062, 340.7125244140625], "page": 11, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "4. Apply styling templates: Depending on the domain of the synthetic dataset, a set of styling templates is first manually selected. Then, a style is randomly selected to format the appearance of the synthesized table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.8453674316406, 245.37010192871094, 545.1974487304688, 291.67205810546875], "page": 11, "span": [0, 218], "__ref_s3_data": null}]}, {"text": "5. Render the complete tables: The synthetic table is finally rendered by a web browser engine to generate the bounding boxes for each table cell. A batching technique is utilized to optimize the runtime overhead of the rendering process.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.942626953125, 184.91055297851562, 545.261962890625, 243.14846801757812], "page": 11, "span": [0, 238], "__ref_s3_data": null}]}, {"text": "2. Prediction post-processing for PDF documents", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [307.89263916015625, 145.01368713378906, 545.1087646484375, 170.1573028564453], "page": 11, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "Although TableFormer can predict the table structure and the bounding boxes for tables recognized inside PDF documents, this is not enough when a full reconstruction of the original table is required. This happens mainly due the following reasons:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.06634521484375, 77.58016204833984, 545.1151733398438, 135.6042938232422], "page": 11, "span": [0, 247], "__ref_s3_data": null}]}, {"text": "11", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.63104248046875, 48.96039962768555, 302.5936279296875, 58.58494567871094], "page": 11, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Figure 7: Distribution of the tables across different dimensions per dataset. Simple vs complex tables per dataset and split, strict vs non strict html structures per dataset and table complexity, missing bboxes per dataset and table complexity.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.27131271362305, 605.3292236328125, 545.1137084960938, 627.6530151367188], "page": 12, "span": [0, 245], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/9"}, {"text": "\u00b7 TableFormer output does not include the table cell content.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [61.34596252441406, 560.20703125, 286.3651123046875, 581.5670166015625], "page": 12, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "\u00b7 There are occasional inaccuracies in the predictions of the bounding boxes.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [61.074283599853516, 526.74169921875, 286.97015380859375, 548.9605102539062], "page": 12, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "However, it is possible to mitigate those limitations by combining the TableFormer predictions with the information already present inside a programmatic PDF document. More specifically, PDF documents can be seen as a sequence of PDF cells where each cell is described by its content and bounding box. If we are able to associate the PDF cells with the predicted table cells, we can directly link the PDF cell content to the table cell structure and use the PDF bounding boxes to correct misalignments in the predicted table cell bounding boxes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.33855056762695, 396.2931213378906, 286.4153747558594, 513.664306640625], "page": 12, "span": [0, 545], "__ref_s3_data": null}]}, {"text": "Here is a step-by-step description of the prediction postprocessing:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.47560119628906, 371.6419677734375, 286.3649597167969, 393.8466491699219], "page": 12, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "1. Get the minimal grid dimensions - number of rows and columns for the predicted table structure. This represents the most granular grid for the underlying table structure.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.45584487915039, 335.4472351074219, 286.3802185058594, 369.84210205078125], "page": 12, "span": [0, 173], "__ref_s3_data": null}]}, {"text": "2. Generate pair-wise matches between the bounding boxes of the PDF cells and the predicted cells. The Intersection Over Union (IOU) metric is used to evaluate the quality of the matches.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.4388313293457, 287.7532043457031, 286.4390869140625, 333.490234375], "page": 12, "span": [0, 187], "__ref_s3_data": null}]}, {"text": "3. Use a carefully selected IOU threshold to designate the matches as \"good\" ones and \"bad\" ones.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.394752502441406, 263.5272216796875, 286.36492919921875, 285.01922607421875], "page": 12, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "3.a. If all IOU scores in a column are below the threshold, discard all predictions (structure and bounding boxes) for that column.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.28644943237305, 227.34722900390625, 286.3651123046875, 261.0640563964844], "page": 12, "span": [0, 131], "__ref_s3_data": null}]}, {"text": "4. Find the best-fitting content alignment for the predicted cells with good IOU per each column. The alignment of the column can be identified by the following formula:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.466670989990234, 190.80416870117188, 286.50482177734375, 224.85504150390625], "page": 12, "span": [0, 169], "__ref_s3_data": null}]}, {"text": "alignment = arg min c { D$_{c}$ } D$_{c}$ = max { x$_{c}$ } - min { x$_{c}$ } (4)", "type": "equation", "name": "Formula", "font": null, "prov": [{"bbox": [110.48045349121094, 137.08892822265625, 286.3623962402344, 169.79971313476562], "page": 12, "span": [0, 81], "__ref_s3_data": null}]}, {"text": "where c is one of { left, centroid, right } and x$_{c}$ is the xcoordinate for the corresponding point.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.242549896240234, 103.07321166992188, 286.36199951171875, 125.2325210571289], "page": 12, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "5. Use the alignment computed in step 4, to compute the median x -coordinate for all table columns and the me-", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.43268966674805, 78.84821319580078, 286.3649597167969, 100.3372802734375], "page": 12, "span": [0, 110], "__ref_s3_data": null}]}, {"text": "dian cell size for all table cells. The usage of median during the computations, helps to eliminate outliers caused by occasional column spans which are usually wider than the normal.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [308.05322265625, 536.2962036132812, 545.2343139648438, 581.705810546875], "page": 12, "span": [0, 183], "__ref_s3_data": null}]}, {"text": "6. Snap all cells with bad IOU to their corresponding median x -coordinates and cell sizes.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.280029296875, 512.0361938476562, 545.1903686523438, 533.602294921875], "page": 12, "span": [0, 91], "__ref_s3_data": null}]}, {"text": "7. Generate a new set of pair-wise matches between the corrected bounding boxes and PDF cells. This time use a modified version of the IOU metric, where the area of the intersection between the predicted and PDF cells is divided by the PDF cell area. In case there are multiple matches for the same PDF cell, the prediction with the higher score is preferred. This covers the cases where the PDF cells are smaller than the area of predicted or corrected prediction cells.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.955810546875, 404.08929443359375, 545.283935546875, 509.8694152832031], "page": 12, "span": [0, 471], "__ref_s3_data": null}]}, {"text": "8. In some rare occasions, we have noticed that TableFormer can confuse a single column as two. When the postprocessing steps are applied, this results with two predicted columns pointing to the same PDF column. In such case we must de-duplicate the columns according to highest total column intersection score.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.9190368652344, 332.00836181640625, 545.3485717773438, 401.61907958984375], "page": 12, "span": [0, 311], "__ref_s3_data": null}]}, {"text": "9. Pick up the remaining orphan cells. There could be cases, when after applying all the previous post-processing steps, some PDF cells could still remain without any match to predicted cells. However, it is still possible to deduce the correct matching for an orphan PDF cell by mapping its bounding box on the geometry of the grid. This mapping decides if the content of the orphan cell will be appended to an already matched table cell, or a new table cell should be created to match with the orphan.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.7784423828125, 223.864013671875, 545.276611328125, 329.9189758300781], "page": 12, "span": [0, 503], "__ref_s3_data": null}]}, {"text": "9a. Compute the top and bottom boundary of the horizontal band for each grid row (min/max y coordinates per row).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [307.93115234375, 187.8454132080078, 545.3099365234375, 221.8761444091797], "page": 12, "span": [0, 113], "__ref_s3_data": null}]}, {"text": "9b. Intersect the orphan's bounding box with the row bands, and map the cell to the closest grid row.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.345458984375, 163.541015625, 545.1150512695312, 185.8941192626953], "page": 12, "span": [0, 101], "__ref_s3_data": null}]}, {"text": "9c. Compute the left and right boundary of the vertical band for each grid column (min/max x coordinates per column).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.0950622558594, 127.3694076538086, 545.1150512695312, 161.0072479248047], "page": 12, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "9d. Intersect the orphan's bounding box with the column bands, and map the cell to the closest grid column.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.24517822265625, 102.65910339355469, 545.114990234375, 124.94414520263672], "page": 12, "span": [0, 107], "__ref_s3_data": null}]}, {"text": "9e. If the table cell under the identified row and column is not empty, extend its content with the content of the or-", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [308.1419677734375, 78.71697998046875, 545.1151733398438, 100.19052124023438], "page": 12, "span": [0, 118], "__ref_s3_data": null}]}, {"text": "12", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.6310729980469, 48.96040725708008, 302.5936584472656, 58.750736236572266], "page": 12, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "phan cell.", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [49.6470832824707, 707.8850708007812, 88.84658813476562, 717.4002685546875], "page": 13, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "9f. Otherwise create a new structural cell and match it wit the orphan cell.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [49.755611419677734, 683.8928833007812, 286.63763427734375, 705.5321044921875], "page": 13, "span": [0, 76], "__ref_s3_data": null}]}, {"text": "Aditional images with examples of TableFormer predictions and post-processing can be found below.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [49.36106491088867, 659.6165161132812, 286.364990234375, 681.5235595703125], "page": 13, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "Figure 8: Example of a table with multi-line header.", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [62.86752700805664, 281.0370788574219, 273.1334228515625, 290.6253662109375], "page": 13, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "13", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.6309814453125, 48.960079193115234, 302.59356689453125, 58.455711364746094], "page": 13, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Figure 9: Example of a table with big empty distance between cells.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [308.1914367675781, 464.54010009765625, 545.1151123046875, 486.05615234375], "page": 13, "span": [0, 67], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/10"}, {"text": "Figure 10: Example of a complex table with empty cells.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [311.8465576171875, 102.13106536865234, 541.63232421875, 112.08642578125], "page": 13, "span": [0, 55], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/11"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/12"}, {"text": "Figure 11: Simple table with different style and empty cells.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.5418815612793, 414.36810302734375, 286.3650817871094, 436.19610595703125], "page": 14, "span": [0, 61], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/13"}, {"text": "Figure 12: Simple table predictions and post processing.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [54.174434661865234, 110.72535705566406, 281.85589599609375, 120.68132781982422], "page": 14, "span": [0, 56], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/14"}, {"text": "14", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.6309814453125, 48.96007537841797, 302.59356689453125, 58.46809768676758], "page": 14, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Figure 13: Table predictions example on colorful table.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [315.2740478515625, 410.7093200683594, 538.1852416992188, 420.72198486328125], "page": 14, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "Figure 14: Example with multi-line text.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [344.60076904296875, 99.54707336425781, 508.9893493652344, 109.21481323242188], "page": 14, "span": [0, 40], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/15"}, {"text": "Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [308.0424499511719, 118.20308685302734, 545.193115234375, 139.58673095703125], "page": 15, "span": [0, 106], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/16"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/17"}, {"text": "Figure 15: Example with triangular table.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [83.66278839111328, 138.1688690185547, 252.24224853515625, 147.89190673828125], "page": 15, "span": [0, 41], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/18"}, {"text": "15", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.6309814453125, 48.9600944519043, 302.59356689453125, 58.5789794921875], "page": 15, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Figure 17: Example of long table. End-to-end example from initial PDF cells to prediction of bounding boxes, post processing and prediction of structure.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [49.661865234375, 262.3688049316406, 545.1138305664062, 284.1699523925781], "page": 16, "span": [0, 153], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/19"}, {"text": "16", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [292.6309814453125, 48.960079193115234, 302.5961608886719, 58.51115036010742], "page": 16, "span": [0, 2], "__ref_s3_data": null}]}], "figures": [{"bounding-box": null, "prov": [{"bbox": [314.3843994140625, 382.2417297363281, 539.5308837890625, 453.7343444824219], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [311.3420715332031, 540.9691162109375, 550.2800903320312, 713.871826171875], "page": 3, "span": [0, 104], "__ref_s3_data": null}], "text": "Figure 2: Distribution of the tables across different table dimensions in PubTabNet + FinTabNet datasets", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [74.15921783447266, 607.8399658203125, 520.050537109375, 713.440185546875], "page": 5, "span": [0, 212], "__ref_s3_data": null}], "text": "Figure 3: TableFormer takes in an image of the PDF and creates bounding box and HTML structure predictions that are synchronized. The bounding boxes grabs the content from the PDF and inserts it in the structure.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [53.81645202636719, 284.83001708984375, 284.68927001953125, 533.7860717773438], "page": 5, "span": [0, 745], "__ref_s3_data": null}], "text": "Figure 4: Given an input image of a table, the Encoder produces fixed-length features that represent the input image. The features are then passed to both the Structure Decoder and Cell BBox Decoder . During training, the Structure Decoder receives 'tokenized tags' of the HTML code that represent the table structure. Afterwards, a transformer encoder and decoder architecture is employed to produce features that are received by a linear layer, and the Cell BBox Decoder. The linear layer is applied to the features to predict the tags. Simultaneously, the Cell BBox Decoder selects features referring to the data cells (' < td > ', ' < ') and passes them through an attention network, an MLP, and a linear layer to predict the bounding boxes.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [50.235816955566406, 604.4113159179688, 302.18707275390625, 687.9998168945312], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [304.36199951171875, 611.2498168945312, 555.037109375, 690.0220947265625], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [51.7950439453125, 348.8772888183594, 211.3548126220703, 411.7484436035156], "page": 8, "span": [0, 397], "__ref_s3_data": null}], "text": "Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [382.25286865234375, 349.6600341796875, 542.1312255859375, 410.2227783203125], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [216.797119140625, 349.422607421875, 375.72662353515625, 411.4811706542969], "page": 8, "span": [0, 112], "__ref_s3_data": null}], "text": "Figure 6: An example of TableFormer predictions (bounding boxes and structure) from generated SynthTabNet table.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [52.74595642089844, 644.7669677734375, 544.16552734375, 717.1785888671875], "page": 12, "span": [0, 245], "__ref_s3_data": null}], "text": "Figure 7: Distribution of the tables across different dimensions per dataset. Simple vs complex tables per dataset and split, strict vs non strict html structures per dataset and table complexity, missing bboxes per dataset and table complexity.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [309.5037841796875, 497.8887634277344, 555.8611450195312, 696.404052734375], "page": 13, "span": [0, 67], "__ref_s3_data": null}], "text": "Figure 9: Example of a table with big empty distance between cells.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [334.1772155761719, 126.88340759277344, 518.6530151367188, 198.84474182128906], "page": 13, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 10: Example of a complex table with empty cells.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [52.312522888183594, 537.9481201171875, 167.34197998046875, 577.6830444335938], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [51.631805419921875, 448.1211242675781, 283.3851013183594, 518.947265625], "page": 14, "span": [0, 61], "__ref_s3_data": null}], "text": "Figure 11: Simple table with different style and empty cells.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [50.32429885864258, 136.2794647216797, 177.11224365234375, 180.9558563232422], "page": 14, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 12: Simple table predictions and post processing.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [320.02325439453125, 199.1812744140625, 519.2925415039062, 244.82144165039062], "page": 14, "span": [0, 40], "__ref_s3_data": null}], "text": "Figure 14: Example with multi-line text.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [54.9669075012207, 543.454345703125, 279.7747802734375, 657.51416015625], "page": 15, "span": [0, 106], "__ref_s3_data": null}], "text": "Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [50.68792724609375, 160.98660278320312, 320.17071533203125, 287.21685791015625], "page": 15, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [353.733642578125, 156.9563751220703, 495.4932861328125, 306.0447692871094], "page": 15, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 15: Example with triangular table.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [66.34233856201172, 295.27490234375, 528.1603393554688, 537.7723999023438], "page": 16, "span": [0, 153], "__ref_s3_data": null}], "text": "Figure 17: Example of long table. End-to-end example from initial PDF cells to prediction of bounding boxes, post processing and prediction of structure.", "type": "figure"}], "tables": [{"bounding-box": null, "prov": [{"bbox": [315.6885681152344, 489.5033874511719, 537.0928344726562, 561.0180053710938], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "Tables organize valuable content in a concise and compact representation. This content is extremely valuable for systems such as search engines, Knowledge Graph's, etc, since they enhance their predictive capabilities. Unfortunately, tables come in a large variety of shapes and sizes. Furthermore, they can have complex column/row-header configurations, multiline rows, different variety of separation lines, missing entries, etc. As such, the correct identification of the table-structure from an image is a nontrivial task. In this paper, we present a new table-structure identification model. The latter improves the latest end-toend deep learning model (i.e. encoder-dual-decoder from PubTabNet) in two significant ways. First, we introduce a new object detection decoder for table-cells. In this way, we can obtain the content of the table-cells from programmatic PDF's directly from the PDF source and avoid the training of the custom OCR decoders. This architectural change leads to more accurate table-content extraction and allows us to tackle non-english tables. Second, we replace the LSTM decoders with transformer based decoders. This upgrade improves significantly the previous state-of-the-art tree-editing-distance-score (TEDS) from 91% to 98.5% on simple tables and from 88.7% to 95% on complex tables.", "type": "table", "#-cols": 3, "#-rows": 2, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [384.03289794921875, 529.1906127929688, 390.0376892089844, 539.321044921875], "spans": [[0, 1]], "text": "3", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [451.9457092285156, 546.5225219726562, 457.95050048828125, 556.6529541015625], "spans": [[0, 2]], "text": "1", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [331.1968078613281, 512.5169067382812, 337.20159912109375, 522.6473388671875], "spans": [[1, 0]], "text": "2", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [315.6885681152344, 295.8706359863281, 536.98681640625, 357.77044677734375], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 1: Picture of a table with subtle, complex features such as (1) multi-column headers, (2) cell with multi-row text and (3) cells with no content. Image from PubTabNet evaluation set, filename: 'PMC2944238 004 02'.", "type": "table", "#-cols": 6, "#-rows": 5, "data": [[{"bbox": [318.8807067871094, 345.5291748046875, 323.273193359375, 354.3141174316406], "spans": [[0, 0]], "text": "0", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [347.24871826171875, 345.5291748046875, 351.6412048339844, 354.3141174316406], "spans": [[0, 1], [0, 2]], "text": "1", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [347.24871826171875, 345.5291748046875, 351.6412048339844, 354.3141174316406], "spans": [[0, 1], [0, 2]], "text": "1", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [394.1042175292969, 344.2760009765625, 465.8810119628906, 354.4064025878906], "spans": [[0, 3], [0, 4]], "text": "2 1", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [394.1042175292969, 344.2760009765625, 465.8810119628906, 354.4064025878906], "spans": [[0, 3], [0, 4]], "text": "2 1", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [318.7731628417969, 333.6695556640625, 323.1656494140625, 342.4544982910156], "spans": [[1, 0]], "text": "3", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [347.24871826171875, 333.6695556640625, 351.6412048339844, 342.4544982910156], "spans": [[1, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [366.7010192871094, 332.748779296875, 398.4967041015625, 342.8791809082031], "spans": [[1, 2]], "text": "5 3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [440.95941162109375, 333.6695556640625, 445.3518981933594, 342.4544982910156], "spans": [[1, 3]], "text": "6", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [487.8149108886719, 333.6695556640625, 492.2073974609375, 342.4544982910156], "spans": [[1, 4]], "text": "7", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [318.7731628417969, 309.51080322265625, 323.1656494140625, 318.2957458496094], "spans": [[2, 0]], "text": "8", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [347.24871826171875, 321.3704528808594, 351.6412048339844, 330.1553955078125], "spans": [[2, 1]], "text": "9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [394.1042175292969, 321.3704528808594, 402.8883056640625, 330.1553955078125], "spans": [[2, 2]], "text": "10", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [440.95941162109375, 321.3704528808594, 449.4228515625, 330.1553955078125], "spans": [[2, 3]], "text": "11", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [487.8149108886719, 321.3704528808594, 496.5989990234375, 330.1553955078125], "spans": [[2, 4]], "text": "12", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [331.90423583984375, 308.54669189453125, 337.9090270996094, 318.6770935058594], "spans": [[2, 5], [3, 5], [4, 5]], "text": "2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 5]}], [{"bbox": null, "spans": [[3, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [347.24871826171875, 309.51080322265625, 356.0328063964844, 318.2957458496094], "spans": [[3, 1]], "text": "13", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [394.1042175292969, 309.51080322265625, 402.8883056640625, 318.2957458496094], "spans": [[3, 2]], "text": "14", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [440.95941162109375, 309.51080322265625, 449.7434997558594, 318.2957458496094], "spans": [[3, 3]], "text": "15", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [487.8149108886719, 309.51080322265625, 496.5989990234375, 318.2957458496094], "spans": [[3, 4]], "text": "16", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [331.90423583984375, 308.54669189453125, 337.9090270996094, 318.6770935058594], "spans": [[2, 5], [3, 5], [4, 5]], "text": "2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [2, 5]}], [{"bbox": null, "spans": [[4, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [347.24871826171875, 298.0903625488281, 356.0328063964844, 306.87530517578125], "spans": [[4, 1]], "text": "17", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [394.1042175292969, 298.0903625488281, 402.8883056640625, 306.87530517578125], "spans": [[4, 2]], "text": "18", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [440.95941162109375, 298.0903625488281, 449.7434997558594, 306.87530517578125], "spans": [[4, 3]], "text": "19", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [487.8149108886719, 298.0903625488281, 496.5989990234375, 306.87530517578125], "spans": [[4, 4]], "text": "20", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [331.90423583984375, 308.54669189453125, 337.9090270996094, 318.6770935058594], "spans": [[2, 5], [3, 5], [4, 5]], "text": "2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [2, 5]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [309.9828796386719, 636.4157104492188, 542.3903198242188, 719.2901611328125], "page": 4, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1: Both \"Combined-Tabnet\" and \"CombinedTabnet\" are variations of the following: (*) The CombinedTabnet dataset is the processed combination of PubTabNet and Fintabnet. (**) The combined dataset is the processed combination of PubTabNet, Fintabnet and TableBank.", "type": "table", "#-cols": 5, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [412.3320007324219, 709.4790649414062, 430.9023132324219, 718.3856201171875], "spans": [[0, 1]], "text": "Tags", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [442.857421875, 709.4790649414062, 464.4463806152344, 718.3856201171875], "spans": [[0, 2]], "text": "Bbox", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [477.78631591796875, 709.4790649414062, 494.9419250488281, 718.3856201171875], "spans": [[0, 3]], "text": "Size", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [508.2818603515625, 709.4790649414062, 536.9143676757812, 718.3856201171875], "spans": [[0, 4]], "text": "Format", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [317.05999755859375, 697.1260986328125, 361.64263916015625, 706.0326538085938], "spans": [[1, 0]], "text": "PubTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [417.8559875488281, 697.1161499023438, 425.37774658203125, 706.33154296875], "spans": [[1, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [449.89569091796875, 697.1161499023438, 457.4174499511719, 706.33154296875], "spans": [[1, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [476.4010009765625, 697.1260986328125, 496.3262023925781, 706.0326538085938], "spans": [[1, 3]], "text": "509k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [512.6349487304688, 697.1260986328125, 532.5601196289062, 706.0326538085938], "spans": [[1, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [317.05999755859375, 685.1710815429688, 359.4309387207031, 694.07763671875], "spans": [[2, 0]], "text": "FinTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [417.8559875488281, 685.1611328125, 425.37774658203125, 694.3765258789062], "spans": [[2, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [449.89569091796875, 685.1611328125, 457.4174499511719, 694.3765258789062], "spans": [[2, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [476.4010009765625, 685.1710815429688, 496.3262023925781, 694.07763671875], "spans": [[2, 3]], "text": "112k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [513.4618530273438, 685.1710815429688, 531.7332763671875, 694.07763671875], "spans": [[2, 4]], "text": "PDF", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [317.05999755859375, 673.215087890625, 359.9788818359375, 682.1216430664062], "spans": [[3, 0]], "text": "TableBank", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [417.8559875488281, 673.2051391601562, 425.37774658203125, 682.4205322265625], "spans": [[3, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [450.812255859375, 673.2051391601562, 456.50091552734375, 682.4205322265625], "spans": [[3, 2]], "text": "7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [476.4010009765625, 673.215087890625, 496.3262023925781, 682.1216430664062], "spans": [[3, 3]], "text": "145k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [511.25018310546875, 673.215087890625, 533.9450073242188, 682.1216430664062], "spans": [[3, 4]], "text": "JPEG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [317.05999755859375, 661.2600708007812, 400.3772277832031, 670.1666259765625], "spans": [[4, 0]], "text": "Combined-Tabnet(*)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [417.8559875488281, 661.2501220703125, 425.37774658203125, 670.4655151367188], "spans": [[4, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [449.89569091796875, 661.2501220703125, 457.4174499511719, 670.4655151367188], "spans": [[4, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [476.4010009765625, 661.2600708007812, 496.3262023925781, 670.1666259765625], "spans": [[4, 3]], "text": "400k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [512.6349487304688, 661.2600708007812, 532.5601196289062, 670.1666259765625], "spans": [[4, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [317.05999755859375, 649.3050537109375, 375.1718444824219, 658.2116088867188], "spans": [[5, 0]], "text": "Combined(**)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [417.8559875488281, 649.2951049804688, 425.37774658203125, 658.510498046875], "spans": [[5, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [449.89569091796875, 649.2951049804688, 457.4174499511719, 658.510498046875], "spans": [[5, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [476.4010009765625, 649.3050537109375, 496.3262023925781, 658.2116088867188], "spans": [[5, 3]], "text": "500k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [512.6349487304688, 649.3050537109375, 532.5601196289062, 658.2116088867188], "spans": [[5, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [317.05999755859375, 637.3500366210938, 369.3935241699219, 646.256591796875], "spans": [[6, 0]], "text": "SynthTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [417.8559875488281, 637.3401489257812, 425.37774658203125, 646.5555419921875], "spans": [[6, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [449.89569091796875, 637.3401489257812, 457.4174499511719, 646.5555419921875], "spans": [[6, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [476.4010009765625, 637.35009765625, 496.3262023925781, 646.2566528320312], "spans": [[6, 3]], "text": "600k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [512.6349487304688, 637.35009765625, 532.5601196289062, 646.2566528320312], "spans": [[6, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [53.472068786621094, 210.4700927734375, 282.5771789550781, 382.98480224609375], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2: Structure results on PubTabNet (PTN), FinTabNet (FTN), TableBank (TB) and SynthTabNet (STN).", "type": "table", "#-cols": 5, "#-rows": 11, "data": [[{"bbox": [78.84300231933594, 362.403076171875, 104.8553466796875, 371.30963134765625], "spans": [[0, 0]], "text": "Model", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [129.33799743652344, 356.42608642578125, 159.21583557128906, 365.3326416015625], "spans": [[0, 1]], "text": "Dataset", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [171.17095947265625, 356.42608642578125, 199.40496826171875, 365.3326416015625], "spans": [[0, 2]], "text": "Simple", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [211.1999969482422, 356.42608642578125, 247.74349975585938, 377.2876281738281], "spans": [[0, 3]], "text": "TEDS Complex", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [264.5404357910156, 356.42608642578125, 277.27264404296875, 365.3326416015625], "spans": [[0, 4]], "text": "All", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [81.61199951171875, 339.4690856933594, 102.08513641357422, 348.3756408691406], "spans": [[1, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [134.87205505371094, 339.4690856933594, 153.69140625, 348.3756408691406], "spans": [[1, 1]], "text": "PTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [176.56553649902344, 339.4690856933594, 194.00009155273438, 348.3756408691406], "spans": [[1, 2]], "text": "91.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [220.82937622070312, 339.4690856933594, 238.26393127441406, 348.3756408691406], "spans": [[1, 3]], "text": "88.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [262.18414306640625, 339.4690856933594, 279.6186828613281, 348.3756408691406], "spans": [[1, 4]], "text": "89.9", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [82.16500091552734, 327.5130920410156, 101.53230285644531, 336.4196472167969], "spans": [[2, 0]], "text": "GTE", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [134.86715698242188, 327.5130920410156, 153.68650817871094, 336.4196472167969], "spans": [[2, 1]], "text": "PTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [183.62411499023438, 327.5130920410156, 186.94166564941406, 336.4196472167969], "spans": [[2, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [227.88795471191406, 327.5130920410156, 231.20550537109375, 336.4196472167969], "spans": [[2, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [259.69854736328125, 327.5130920410156, 282.1144104003906, 336.4196472167969], "spans": [[2, 4]], "text": "93.01", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [66.31500244140625, 314.9600830078125, 117.38329315185547, 323.86663818359375], "spans": [[3, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [134.86766052246094, 314.9600830078125, 153.68701171875, 323.86663818359375], "spans": [[3, 1]], "text": "PTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [176.57110595703125, 314.9600830078125, 194.0056610107422, 323.86663818359375], "spans": [[3, 2]], "text": "98.5", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [220.83494567871094, 314.9600830078125, 238.26950073242188, 323.86663818359375], "spans": [[3, 3]], "text": "95.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [259.697998046875, 315.0298156738281, 282.1138610839844, 323.9862060546875], "spans": [[3, 4]], "text": "96.75", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [81.61199951171875, 299.76708984375, 102.08513641357422, 308.67364501953125], "spans": [[4, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [134.87205505371094, 299.76708984375, 153.69140625, 308.67364501953125], "spans": [[4, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [176.56553649902344, 299.76708984375, 194.00009155273438, 308.67364501953125], "spans": [[4, 2]], "text": "88.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [218.33871459960938, 299.76708984375, 240.7545623779297, 308.67364501953125], "spans": [[4, 3]], "text": "92.08", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [262.1841125488281, 299.76708984375, 279.61865234375, 308.67364501953125], "spans": [[4, 4]], "text": "90.6", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [82.16500091552734, 287.8121032714844, 101.53230285644531, 296.7186584472656], "spans": [[5, 0]], "text": "GTE", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [134.86715698242188, 287.8121032714844, 153.68650817871094, 296.7186584472656], "spans": [[5, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [183.62411499023438, 287.8121032714844, 186.94166564941406, 296.7186584472656], "spans": [[5, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [227.88795471191406, 287.8121032714844, 231.20550537109375, 296.7186584472656], "spans": [[5, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [259.69854736328125, 287.8121032714844, 282.1144104003906, 296.7186584472656], "spans": [[5, 4]], "text": "87.14", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [71.78900146484375, 275.85711669921875, 111.90838623046875, 284.763671875], "spans": [[6, 0]], "text": "GTE (FT)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [134.86221313476562, 275.85711669921875, 153.6815643310547, 284.763671875], "spans": [[6, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [183.62913513183594, 275.85711669921875, 186.94668579101562, 284.763671875], "spans": [[6, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [227.89297485351562, 275.85711669921875, 231.2105255126953, 284.763671875], "spans": [[6, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [259.693603515625, 275.85711669921875, 282.1094665527344, 284.763671875], "spans": [[6, 4]], "text": "91.02", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [66.31500244140625, 263.9021301269531, 117.38329315185547, 272.8086853027344], "spans": [[7, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [134.86766052246094, 263.9021301269531, 153.68701171875, 272.8086853027344], "spans": [[7, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [176.57110595703125, 263.9021301269531, 194.0056610107422, 272.8086853027344], "spans": [[7, 2]], "text": "97.5", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [220.83494567871094, 263.9021301269531, 238.26950073242188, 272.8086853027344], "spans": [[7, 3]], "text": "96.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [262.1889953613281, 263.97186279296875, 279.62353515625, 272.9282531738281], "spans": [[7, 4]], "text": "96.8", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [81.61199951171875, 246.59507751464844, 102.08513641357422, 255.5016326904297], "spans": [[8, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [137.91064453125, 246.59507751464844, 150.64285278320312, 255.5016326904297], "spans": [[8, 1]], "text": "TB", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [176.56553649902344, 246.59507751464844, 194.00009155273438, 255.5016326904297], "spans": [[8, 2]], "text": "86.0", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [227.89285278320312, 246.59507751464844, 231.2104034423828, 255.5016326904297], "spans": [[8, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [262.1841125488281, 246.59507751464844, 279.61865234375, 255.5016326904297], "spans": [[8, 4]], "text": "86.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [66.31500244140625, 234.6390838623047, 117.38329315185547, 243.54563903808594], "spans": [[9, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [137.90625, 234.6390838623047, 150.63845825195312, 243.54563903808594], "spans": [[9, 1]], "text": "TB", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [176.57110595703125, 234.6390838623047, 194.0056610107422, 243.54563903808594], "spans": [[9, 2]], "text": "89.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [227.88845825195312, 234.6390838623047, 231.2060089111328, 243.54563903808594], "spans": [[9, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [262.1889953613281, 234.7088165283203, 279.62353515625, 243.66519165039062], "spans": [[9, 4]], "text": "89.6", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [66.31500244140625, 215.09107971191406, 117.38329315185547, 223.9976348876953], "spans": [[10, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [134.86766052246094, 215.09107971191406, 153.68701171875, 223.9976348876953], "spans": [[10, 1]], "text": "STN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [176.57110595703125, 215.09107971191406, 194.0056610107422, 223.9976348876953], "spans": [[10, 2]], "text": "96.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [220.83494567871094, 215.09107971191406, 238.26950073242188, 223.9976348876953], "spans": [[10, 3]], "text": "95.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [262.189697265625, 215.09107971191406, 279.6242370605469, 223.9976348876953], "spans": [[10, 4]], "text": "96.7", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [308.2708740234375, 487.9087829589844, 533.3538208007812, 544.2454833984375], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3: Cell Bounding Box detection results on PubTabNet, and FinTabNet. PP: Post-processing.", "type": "table", "#-cols": 4, "#-rows": 4, "data": [[{"bbox": [339.322998046875, 529.4290771484375, 365.3353576660156, 538.3356323242188], "spans": [[0, 0]], "text": "Model", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [401.04132080078125, 529.4290771484375, 430.9191589355469, 538.3356323242188], "spans": [[0, 1]], "text": "Dataset", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [454.1021423339844, 529.4290771484375, 474.5852355957031, 538.3356323242188], "spans": [[0, 2]], "text": "mAP", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [486.54034423828125, 529.4290771484375, 527.2276000976562, 538.3356323242188], "spans": [[0, 3]], "text": "mAP (PP)", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [327.656005859375, 512.4721069335938, 377.0007629394531, 521.378662109375], "spans": [[1, 0]], "text": "EDD+BBox", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [393.6980895996094, 512.4721069335938, 438.2807312011719, 521.378662109375], "spans": [[1, 1]], "text": "PubTabNet", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [455.6355895996094, 512.4721069335938, 473.07012939453125, 521.378662109375], "spans": [[1, 2]], "text": "79.2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [498.1659240722656, 512.4721069335938, 515.6004638671875, 521.378662109375], "spans": [[1, 3]], "text": "82.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [326.7950134277344, 500.5171203613281, 377.8633117675781, 509.4236755371094], "spans": [[2, 0]], "text": "TableFormer", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [393.6938781738281, 500.5171203613281, 438.2765197753906, 509.4236755371094], "spans": [[2, 1]], "text": "PubTabNet", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [455.6310119628906, 500.58685302734375, 473.0655517578125, 509.5432434082031], "spans": [[2, 2]], "text": "82.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [498.1712951660156, 500.58685302734375, 515.6058349609375, 509.5432434082031], "spans": [[2, 3]], "text": "86.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [326.7950134277344, 488.5621337890625, 377.8633117675781, 497.46868896484375], "spans": [[3, 0]], "text": "TableFormer", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [389.81842041015625, 488.5621337890625, 442.1519470214844, 497.46868896484375], "spans": [[3, 1]], "text": "SynthTabNet", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [455.63134765625, 488.5621337890625, 473.0658874511719, 497.46868896484375], "spans": [[3, 2]], "text": "87.7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [505.22515869140625, 488.5621337890625, 508.5426940917969, 497.46868896484375], "spans": [[3, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [332.6026611328125, 148.15008544921875, 520.7051391601562, 251.47610473632812], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 4: Results of structure with content retrieved using cell detection on PubTabNet. In all cases the input is PDF documents with cropped tables.", "type": "table", "#-cols": 4, "#-rows": 7, "data": [[{"bbox": [358.010986328125, 230.86007690429688, 384.0233459472656, 239.76663208007812], "spans": [[0, 0]], "text": "Model", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [408.5059814453125, 224.88307189941406, 436.739990234375, 233.7896270751953], "spans": [[0, 1]], "text": "Simple", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [448.6950988769531, 224.88307189941406, 485.0784912109375, 245.74462890625], "spans": [[0, 2]], "text": "TEDS Complex", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [499.3847961425781, 224.88307189941406, 512.1170043945312, 233.7896270751953], "spans": [[0, 3]], "text": "All", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [357.6820068359375, 207.92608642578125, 384.3518981933594, 216.8326416015625], "spans": [[1, 0]], "text": "Tabula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [413.9009704589844, 207.92608642578125, 431.33551025390625, 216.8326416015625], "spans": [[1, 1]], "text": "78.0", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [458.164794921875, 207.92608642578125, 475.5993347167969, 216.8326416015625], "spans": [[1, 2]], "text": "57.8", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [497.0289001464844, 207.92608642578125, 514.4634399414062, 216.8326416015625], "spans": [[1, 3]], "text": "67.9", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [350.7229919433594, 195.97108459472656, 391.3106384277344, 204.8776397705078], "spans": [[2, 0]], "text": "Traprange", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [413.90582275390625, 195.97108459472656, 431.3403625488281, 204.8776397705078], "spans": [[2, 1]], "text": "60.8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [458.1696472167969, 195.97108459472656, 475.60418701171875, 204.8776397705078], "spans": [[2, 2]], "text": "49.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [497.03375244140625, 195.97108459472656, 514.4683227539062, 204.8776397705078], "spans": [[2, 3]], "text": "55.4", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [354.135986328125, 184.0150909423828, 387.89923095703125, 192.92164611816406], "spans": [[3, 0]], "text": "Camelot", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [413.901611328125, 184.0150909423828, 431.3361511230469, 192.92164611816406], "spans": [[3, 1]], "text": "80.0", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [458.1654357910156, 184.0150909423828, 475.5999755859375, 192.92164611816406], "spans": [[3, 2]], "text": "66.0", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [497.029541015625, 184.0150909423828, 514.464111328125, 192.92164611816406], "spans": [[3, 3]], "text": "73.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [346.5589904785156, 172.06008911132812, 395.475341796875, 180.96664428710938], "spans": [[4, 0]], "text": "Acrobat Pro", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [413.9061584472656, 172.06008911132812, 431.3406982421875, 180.96664428710938], "spans": [[4, 1]], "text": "68.9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [458.16998291015625, 172.06008911132812, 475.6045227050781, 180.96664428710938], "spans": [[4, 2]], "text": "61.8", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [497.0340881347656, 172.06008911132812, 514.4686279296875, 180.96664428710938], "spans": [[4, 3]], "text": "65.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [360.781005859375, 160.10508728027344, 381.254150390625, 169.0116424560547], "spans": [[5, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [413.9015808105469, 160.10508728027344, 431.33612060546875, 169.0116424560547], "spans": [[5, 1]], "text": "91.2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [458.1654052734375, 160.10508728027344, 475.5999450683594, 169.0116424560547], "spans": [[5, 2]], "text": "85.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [497.0295104980469, 160.10508728027344, 514.4640502929688, 169.0116424560547], "spans": [[5, 3]], "text": "88.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [345.4830017089844, 148.15008544921875, 396.5513000488281, 157.056640625], "spans": [[6, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [413.9061584472656, 148.15008544921875, 431.3406982421875, 157.056640625], "spans": [[6, 1]], "text": "95.4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [458.16998291015625, 148.15008544921875, 475.6045227050781, 157.056640625], "spans": [[6, 2]], "text": "90.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [497.03399658203125, 148.21981811523438, 514.4685668945312, 157.1761932373047], "spans": [[6, 3]], "text": "93.6", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [53.395973205566406, 498.96612548828125, 298.77838134765625, 573.2565307617188], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 6, "#-rows": 10, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [209.93284606933594, 565.6378784179688, 241.04458618164062, 569.8192749023438], "spans": [[0, 2], [0, 3]], "text": "\u8ad6\u6587\u30d5\u30a1\u30a4\u30eb", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [209.93284606933594, 565.6378784179688, 241.04458618164062, 569.8192749023438], "spans": [[0, 2], [0, 3]], "text": "\u8ad6\u6587\u30d5\u30a1\u30a4\u30eb", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [263.764892578125, 565.6378784179688, 284.5058898925781, 569.8192749023438], "spans": [[0, 4], [0, 5]], "text": "\u53c2\u8003\u6587\u732e", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [263.764892578125, 565.6378784179688, 284.5058898925781, 569.8192749023438], "spans": [[0, 4], [0, 5]], "text": "\u53c2\u8003\u6587\u732e", "type": "col_header", "col": 5, "col-header": false, "col-span": [4, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [110.24990844726562, 558.1526489257812, 120.62017822265625, 562.3340454101562], "spans": [[1, 0]], "text": "\u51fa\u5178", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [175.3660888671875, 558.1526489257812, 201.29246520996094, 562.3340454101562], "spans": [[1, 1]], "text": "\u30d5\u30a1\u30a4\u30eb \u6570", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [209.62408447265625, 558.1526489257812, 219.99435424804688, 562.3340454101562], "spans": [[1, 2]], "text": "\u82f1\u8a9e", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [229.19813537597656, 558.1526489257812, 244.75376892089844, 562.3340454101562], "spans": [[1, 3]], "text": "\u65e5\u672c\u8a9e", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [256.11419677734375, 558.1526489257812, 266.4844665527344, 562.3340454101562], "spans": [[1, 4]], "text": "\u82f1\u8a9e", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [278.38433837890625, 558.1526489257812, 293.9399719238281, 562.3340454101562], "spans": [[1, 5]], "text": "\u65e5\u672c\u8a9e", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [55.530521392822266, 551.2162475585938, 162.71310424804688, 555.5741577148438], "spans": [[2, 0]], "text": "Association for Computational Linguistics(ACL2003)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [184.39730834960938, 551.2162475585938, 189.56455993652344, 555.5741577148438], "spans": [[2, 1]], "text": "65", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [208.99026489257812, 551.2162475585938, 214.1575164794922, 555.5741577148438], "spans": [[2, 2]], "text": "65", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [234.8751678466797, 551.2162475585938, 237.4583282470703, 555.5741577148438], "spans": [[2, 3]], "text": "0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [256.88446044921875, 551.2162475585938, 264.63580322265625, 555.5741577148438], "spans": [[2, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [284.06134033203125, 551.2162475585938, 286.6445007324219, 555.5741577148438], "spans": [[2, 5]], "text": "0", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [55.530521392822266, 545.0216064453125, 139.7225341796875, 549.3795166015625], "spans": [[3, 0]], "text": "Computational Linguistics(COLING2002)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [183.10536193847656, 545.0216064453125, 190.85670471191406, 549.3795166015625], "spans": [[3, 1]], "text": "140", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [207.6983184814453, 545.0216064453125, 215.4496612548828, 549.3795166015625], "spans": [[3, 2]], "text": "140", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [234.8751678466797, 545.0216064453125, 237.4583282470703, 549.3795166015625], "spans": [[3, 3]], "text": "0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [256.88446044921875, 545.0216064453125, 264.63580322265625, 549.3795166015625], "spans": [[3, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [284.06134033203125, 545.0216064453125, 286.6445007324219, 549.3795166015625], "spans": [[3, 5]], "text": "0", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [55.530521392822266, 538.0201416015625, 128.96026611328125, 542.4105834960938], "spans": [[4, 0]], "text": "\u96fb\u6c17\u60c5\u5831\u901a\u4fe1\u5b66\u4f1a 2003 \u5e74\u7dcf\u5408\u5927\u4f1a", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [183.10536193847656, 538.8270263671875, 190.85670471191406, 543.1849365234375], "spans": [[4, 1]], "text": "150", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [210.2822265625, 538.8270263671875, 212.86538696289062, 543.1849365234375], "spans": [[4, 2]], "text": "8", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [232.29153442382812, 538.8270263671875, 240.04287719726562, 543.1849365234375], "spans": [[4, 3]], "text": "142", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [256.88446044921875, 538.8270263671875, 264.63580322265625, 543.1849365234375], "spans": [[4, 4]], "text": "223", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [281.4774169921875, 538.8270263671875, 289.228759765625, 543.1849365234375], "spans": [[4, 5]], "text": "147", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [55.530521392822266, 530.534912109375, 129.88177490234375, 534.9253540039062], "spans": [[5, 0]], "text": "\u60c5\u5831\u51e6\u7406\u5b66\u4f1a\u7b2c 65 \u56de\u5168\u56fd\u5927\u4f1a (2003)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [183.10536193847656, 531.341796875, 190.85670471191406, 535.69970703125], "spans": [[5, 1]], "text": "177", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [210.2822265625, 531.341796875, 212.86538696289062, 535.69970703125], "spans": [[5, 2]], "text": "1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [232.29153442382812, 531.341796875, 240.04287719726562, 535.69970703125], "spans": [[5, 3]], "text": "176", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [256.88446044921875, 531.341796875, 264.63580322265625, 535.69970703125], "spans": [[5, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [281.4774169921875, 531.341796875, 289.228759765625, 535.69970703125], "spans": [[5, 5]], "text": "236", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [55.530521392822266, 523.3078002929688, 129.88177490234375, 527.6982421875], "spans": [[6, 0]], "text": "\u7b2c 17 \u56de\u4eba\u5de5\u77e5\u80fd\u5b66\u4f1a\u5168\u56fd\u5927\u4f1a (2003)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [183.10536193847656, 524.1146850585938, 190.85670471191406, 528.4725952148438], "spans": [[6, 1]], "text": "208", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [210.2822265625, 524.1146850585938, 212.86538696289062, 528.4725952148438], "spans": [[6, 2]], "text": "5", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [232.29153442382812, 524.1146850585938, 240.04287719726562, 528.4725952148438], "spans": [[6, 3]], "text": "203", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [256.88446044921875, 524.1146850585938, 264.63580322265625, 528.4725952148438], "spans": [[6, 4]], "text": "152", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [281.4774169921875, 524.1146850585938, 289.228759765625, 528.4725952148438], "spans": [[6, 5]], "text": "244", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [55.530521392822266, 516.0807495117188, 127.32453918457031, 520.47119140625], "spans": [[7, 0]], "text": "\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u7814\u7a76\u4f1a\u7b2c 146 \u301c 155 \u56de", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [184.39730834960938, 516.8876342773438, 189.56455993652344, 521.2455444335938], "spans": [[7, 1]], "text": "98", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [210.2822265625, 516.8876342773438, 212.86538696289062, 521.2455444335938], "spans": [[7, 2]], "text": "2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [233.58348083496094, 516.8876342773438, 238.750732421875, 521.2455444335938], "spans": [[7, 3]], "text": "96", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [256.88446044921875, 516.8876342773438, 264.63580322265625, 521.2455444335938], "spans": [[7, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [281.4774169921875, 516.8876342773438, 289.228759765625, 521.2455444335938], "spans": [[7, 5]], "text": "232", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [55.530521392822266, 508.59564208984375, 110.16829681396484, 512.986083984375], "spans": [[8, 0]], "text": "WWW \u304b\u3089\u53ce\u96c6\u3057\u305f\u8ad6\u6587", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [183.10536193847656, 509.6605224609375, 190.85670471191406, 514.0184326171875], "spans": [[8, 1]], "text": "107", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [208.99026489257812, 509.6605224609375, 214.1575164794922, 514.0184326171875], "spans": [[8, 2]], "text": "73", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [233.58348083496094, 509.6605224609375, 238.750732421875, 514.0184326171875], "spans": [[8, 3]], "text": "34", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [256.88446044921875, 509.6605224609375, 264.63580322265625, 514.0184326171875], "spans": [[8, 4]], "text": "147", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [282.7693786621094, 509.6605224609375, 287.9366149902344, 514.0184326171875], "spans": [[8, 5]], "text": "96", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": null, "spans": [[9, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [183.10536193847656, 502.1754150390625, 190.85670471191406, 506.5333251953125], "spans": [[9, 1]], "text": "945", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [207.6983184814453, 502.1754150390625, 215.4496612548828, 506.5333251953125], "spans": [[9, 2]], "text": "294", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [232.29153442382812, 502.1754150390625, 240.04287719726562, 506.5333251953125], "spans": [[9, 3]], "text": "651", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [255.7650604248047, 502.1754150390625, 265.7520446777344, 506.5333251953125], "spans": [[9, 4]], "text": "1122", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [281.4774169921875, 502.1754150390625, 289.228759765625, 506.5333251953125], "spans": [[9, 5]], "text": "955", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [304.5496826171875, 504.4200439453125, 550.3656005859375, 573.4367065429688], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "Text is aligned to match original for ease of viewing", "type": "table", "#-cols": 5, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [392.0967102050781, 565.3603515625, 438.0144958496094, 570.425537109375], "spans": [[0, 1], [0, 2]], "text": "Shares (in millions)", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [392.0967102050781, 565.3603515625, 438.0144958496094, 570.425537109375], "spans": [[0, 1], [0, 2]], "text": "Shares (in millions)", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [459.0486145019531, 559.1006469726562, 542.0001831054688, 570.3758544921875], "spans": [[0, 3], [0, 4]], "text": "Weighted Average Grant Date Fair Value", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [459.0486145019531, 559.1006469726562, 542.0001831054688, 570.3758544921875], "spans": [[0, 3], [0, 4]], "text": "Weighted Average Grant Date Fair Value", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [393.24420166015625, 550.1876831054688, 407.3463134765625, 555.2528686523438], "spans": [[1, 1]], "text": "RS U s", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.1832275390625, 550.1876831054688, 440.98779296875, 555.2528686523438], "spans": [[1, 2]], "text": "PSUs", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [468.3825378417969, 550.1876831054688, 482.4846496582031, 555.2528686523438], "spans": [[1, 3]], "text": "RSUs", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [516.92578125, 550.1876831054688, 530.7303466796875, 555.2528686523438], "spans": [[1, 4]], "text": "PSUs", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [306.11492919921875, 542.323974609375, 364.65606689453125, 547.38916015625], "spans": [[2, 0]], "text": "Nonvested on Janua ry 1", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [396.2466125488281, 542.0215454101562, 403.75531005859375, 547.0867309570312], "spans": [[2, 1]], "text": "1. 1", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [429.8183898925781, 542.0215454101562, 437.32708740234375, 547.0867309570312], "spans": [[2, 2]], "text": "0.3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [465.5285949707031, 542.0215454101562, 483.5500183105469, 547.0867309570312], "spans": [[2, 3]], "text": "90.10 $", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [513.4482421875, 542.0215454101562, 531.4696655273438, 547.0867309570312], "spans": [[2, 4]], "text": "$ 91.19", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [306.11492919921875, 533.2503051757812, 325.6267395019531, 538.3154907226562], "spans": [[3, 0]], "text": "Granted", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [396.2466125488281, 533.2503051757812, 403.75531005859375, 538.3154907226562], "spans": [[3, 1]], "text": "0. 5", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [429.8183898925781, 533.2503051757812, 437.32708740234375, 538.3154907226562], "spans": [[3, 2]], "text": "0.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [466.435791015625, 533.2503051757812, 482.5483093261719, 538.3154907226562], "spans": [[3, 3]], "text": "117.44", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [514.2906494140625, 533.2503051757812, 530.809814453125, 538.3154907226562], "spans": [[3, 4]], "text": "122.41", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [306.11492919921875, 525.3865966796875, 322.628662109375, 530.4517822265625], "spans": [[4, 0]], "text": "Vested", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [394.4322204589844, 525.3865966796875, 405.5362548828125, 530.4517822265625], "spans": [[4, 1]], "text": "(0. 5 )", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [427.70159912109375, 525.3865966796875, 438.8056335449219, 530.4517822265625], "spans": [[4, 2]], "text": "(0.1)", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [468.5553283691406, 525.3865966796875, 482.0704345703125, 530.4517822265625], "spans": [[4, 3]], "text": "87.08", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [516.0186157226562, 525.3865966796875, 529.5337524414062, 530.4517822265625], "spans": [[4, 4]], "text": "81.14", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [306.11492919921875, 517.2933349609375, 356.2477111816406, 522.3585205078125], "spans": [[5, 0]], "text": "Canceled or forfeited", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [394.4322204589844, 516.6153564453125, 405.5362548828125, 521.6805419921875], "spans": [[5, 1]], "text": "(0. 1 )", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [431.02801513671875, 516.6153564453125, 436.4280090332031, 521.6805419921875], "spans": [[5, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [465.83099365234375, 516.6153564453125, 482.3501281738281, 521.6805419921875], "spans": [[5, 3]], "text": "102.01", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [516.0186157226562, 516.6153564453125, 529.5337524414062, 521.6805419921875], "spans": [[5, 4]], "text": "92.18", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [306.11492919921875, 508.4490661621094, 373.3576354980469, 513.5142822265625], "spans": [[6, 0]], "text": "Nonvested on December 31", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [396.2466125488281, 508.4490661621094, 403.75531005859375, 513.5142822265625], "spans": [[6, 1]], "text": "1.0", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [429.5159912109375, 508.4490661621094, 437.0246887207031, 513.5142822265625], "spans": [[6, 2]], "text": "0.3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [463.7142028808594, 508.4490661621094, 484.7396545410156, 513.5142822265625], "spans": [[6, 3]], "text": "104.85 $", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [512.99462890625, 508.4490661621094, 534.0200805664062, 513.5142822265625], "spans": [[6, 4]], "text": "$ 104.51", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}, {"height": 792.0, "page": 15, "width": 612.0}, {"height": 792.0, "page": 16, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2203.01017v2.pdf", "filename-prov": null, "document-hash": "4fa8dff93d74a84069210c84a38d14d62a39ec8f4e4c90bf955fdebdebcf6636", "#-pages": 16, "collection-name": null, "description": null, "page-hashes": [{"hash": "5deca8f7af439d2d968a480d07761ace8f704461e79d8b3d1dce2c394acdeab7", "model": "default", "page": 1}, {"hash": "81bd44713b62df481eaab1ac092cbc8b66359e53c7ecd637bb30d2680b1d2692", "model": "default", "page": 2}, {"hash": "95b5360d00f9fbcd6d5c5caa4529279e7f31219fd75e4495a349a1897700a2fe", "model": "default", "page": 3}, {"hash": "6d4e2424458b083b36c5559a7fe1a42175b082247c516ca8fef9f0d46e6f0bbc", "model": "default", "page": 4}, {"hash": "50115d582a0897fe1dd520a6876ec3f9321690ed0f6cfdc99a8d09019be073e8", "model": "default", "page": 5}, {"hash": "7d6c3a5e249a7f5de56f840b4ea97322f82ca158f6360d03a04a515a575334ab", "model": "default", "page": 6}, {"hash": "ccc222216b8699749c3cb8165aea097d4534eb5d136b2b41263632b1cfb39c67", "model": "default", "page": 7}, {"hash": "73792a09917cca8042a12d1e86bbd2c3c4ddc52d7a150b51940e5231e643bfb5", "model": "default", "page": 8}, {"hash": "8f623b1d6519eb087acf7a13bbe305f093837ba8d14d17cc1af3d091f98a0622", "model": "default", "page": 9}, {"hash": "bde30f21fc04de83c8bd77c8c61fae7f5f2586beb9f5bf346025d2f819269221", "model": "default", "page": 10}, {"hash": "c95eeaf7e1e6efd5a3d169b8914ffb8cb9e9fb82f8dbbae9e98873c3261df57a", "model": "default", "page": 11}, {"hash": "b0db1f70185308047bcdc86e8a515dab11ea727d6819da16fa24c3c829dc4b1c", "model": "default", "page": 12}, {"hash": "42e3d141f1ce66ee82a1447ce816b5a086f75362e6066155739437b058be8c7b", "model": "default", "page": 13}, {"hash": "41b546ffa2bea0771a5c77de1ca64c766ddc4305dd0316993b34b640b686ee06", "model": "default", "page": 14}, {"hash": "c5f2076bcd18075927d93d81dc83d2a5a0f3fdf1085d2f51e3ad10bdd6ad90bc", "model": "default", "page": 15}, {"hash": "0e6e359322b6c285571833316a3dfee50f7139f0ea088d026e0007cd2a679992", "model": "default", "page": 16}]}, "main-text": [{"prov": [{"bbox": [16.783903121948242, 231.99996948242188, 36.339778900146484, 584.1799926757812], "page": 1, "span": [0, 38], "__ref_s3_data": null}], "text": "arXiv:2203.01017v2 [cs.CV] 11 Mar 2022", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [95.52632904052734, 672.0686645507812, 498.9270935058594, 685.8598022460938], "page": 1, "span": [0, 61], "__ref_s3_data": null}], "text": "TableFormer: Table Structure Understanding with Transformers.", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [141.79666137695312, 620.6796264648438, 453.0020751953125, 646.2996826171875], "page": 1, "span": [0, 73], "__ref_s3_data": null}], "text": "Ahmed Nassar, Nikolaos Livathinos, Maksym Lysak, Peter Staar IBM Research", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [208.1230010986328, 606.532470703125, 379.310791015625, 616.525390625], "page": 1, "span": [0, 35], "__ref_s3_data": null}], "text": "{ ahn,nli,mly,taa } @zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [145.03118896484375, 565.769287109375, 190.65908813476562, 576.84765625], "page": 1, "span": [0, 8], "__ref_s3_data": null}], "text": "Abstract", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [315.37042236328125, 565.2451782226562, 408.4407043457031, 575.1142578125], "page": 1, "span": [0, 22], "__ref_s3_data": null}], "text": "a. Picture of a table:", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [50.111976623535156, 241.30950927734375, 126.94803619384766, 252.81288146972656], "page": 1, "span": [0, 15], "__ref_s3_data": null}], "text": "1. Introduction", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.18265151977539, 78.84822082519531, 286.3650817871094, 232.2576904296875], "page": 1, "span": [0, 712], "__ref_s3_data": null}], "text": "The occurrence of tables in documents is ubiquitous. They often summarise quantitative or factual data, which is cumbersome to describe in verbose text but nevertheless extremely valuable. Unfortunately, this compact representation is often not easy to parse by machines. There are many implicit conventions used to obtain a compact table representation. For example, tables often have complex columnand row-headers in order to reduce duplicated cell content. Lines of different shapes and sizes are leveraged to separate content or indicate a tree structure. Additionally, tables can also have empty/missing table-entries or multi-row textual table-entries. Fig. 1 shows a table which presents all these issues.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [48.88529968261719, 277.8124694824219, 286.7518310546875, 551.832275390625], "page": 1, "span": [0, 1320], "__ref_s3_data": null}], "text": "Tables organize valuable content in a concise and compact representation. This content is extremely valuable for systems such as search engines, Knowledge Graph's, etc, since they enhance their predictive capabilities. Unfortunately, tables come in a large variety of shapes and sizes. Furthermore, they can have complex column/row-header configurations, multiline rows, different variety of separation lines, missing entries, etc. As such, the correct identification of the table-structure from an image is a nontrivial task. In this paper, we present a new table-structure identification model. The latter improves the latest end-toend deep learning model (i.e. encoder-dual-decoder from PubTabNet) in two significant ways. First, we introduce a new object detection decoder for table-cells. In this way, we can obtain the content of the table-cells from programmatic PDF's directly from the PDF source and avoid the training of the custom OCR decoders. This architectural change leads to more accurate table-content extraction and allows us to tackle non-english tables. Second, we replace the LSTM decoders with transformer based decoders. This upgrade improves significantly the previous state-of-the-art tree-editing-distance-score (TEDS) from 91% to 98.5% on simple tables and from 88.7% to 95% on complex tables.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"prov": [{"bbox": [315.25408935546875, 458.4998779296875, 486.4019470214844, 479.3412780761719], "page": 1, "span": [0, 68], "__ref_s3_data": null}], "text": "b. Red-annotation of bounding boxes, Blue-predictions by TableFormer", "type": "paragraph", "name": "List-item", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"prov": [{"bbox": [315.3083801269531, 362.0699157714844, 491.1912536621094, 372.62310791015625], "page": 1, "span": [0, 38], "__ref_s3_data": null}], "text": "c. Structure predicted by TableFormer:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.8612060546875, 232.7270965576172, 545.1151733398438, 278.37225341796875], "page": 1, "span": [0, 220], "__ref_s3_data": null}], "text": "Figure 1: Picture of a table with subtle, complex features such as (1) multi-column headers, (2) cell with multi-row text and (3) cells with no content. Image from PubTabNet evaluation set, filename: 'PMC2944238 004 02'.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"prov": [{"bbox": [307.8420104980469, 126.95307159423828, 545.50439453125, 208.4013671875], "page": 1, "span": [0, 363], "__ref_s3_data": null}], "text": "Recently, significant progress has been made with vision based approaches to extract tables in documents. For the sake of completeness, the issue of table extraction from documents is typically decomposed into two separate challenges, i.e. (1) finding the location of the table(s) on a document-page and (2) finding the structure of a given table in the document.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9032287597656, 78.15805053710938, 545.4091796875, 124.39737701416016], "page": 1, "span": [0, 229], "__ref_s3_data": null}], "text": "The first problem is called table-location and has been previously addressed [30, 38, 19, 21, 23, 26, 8] with stateof-the-art object-detection networks (e.g. YOLO and later on Mask-RCNN [9]). For all practical purposes, it can be", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [295.1210021972656, 48.9600715637207, 300.102294921875, 58.62150192260742], "page": 1, "span": [0, 1], "__ref_s3_data": null}], "text": "1", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [49.38566970825195, 695.9300537109375, 286.36505126953125, 717.7666015625], "page": 2, "span": [0, 75], "__ref_s3_data": null}], "text": "considered as a solved problem, given enough ground-truth data to train on.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.321495056152344, 563.8181762695312, 286.3651428222656, 693.599365234375], "page": 2, "span": [0, 626], "__ref_s3_data": null}], "text": "The second problem is called table-structure decomposition. The latter is a long standing problem in the community of document understanding [6, 4, 14]. Contrary to the table-location problem, there are no commonly used approaches that can easily be re-purposed to solve this problem. Lately, a set of new model-architectures has been proposed by the community to address table-structure decomposition [37, 36, 18, 20]. All these models have some weaknesses (see Sec. 2). The common denominator here is the reliance on textual features and/or the inability to provide the bounding box of each table-cell in the original image.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.25040817260742, 420.054931640625, 286.4363708496094, 561.5592651367188], "page": 2, "span": [0, 643], "__ref_s3_data": null}], "text": "In this paper, we want to address these weaknesses and present a robust table-structure decomposition algorithm. The design criteria for our model are the following. First, we want our algorithm to be language agnostic. In this way, we can obtain the structure of any table, irregardless of the language. Second, we want our algorithm to leverage as much data as possible from the original PDF document. For programmatic PDF documents, the text-cells can often be extracted much faster and with higher accuracy compared to OCR methods. Last but not least, we want to have a direct link between the table-cell and its bounding box in the image.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.561458587646484, 359.8269958496094, 286.3665771484375, 417.9549255371094], "page": 2, "span": [0, 242], "__ref_s3_data": null}], "text": "To meet the design criteria listed above, we developed a new model called TableFormer and a synthetically generated table structure dataset called SynthTabNet $^{1}$. In particular, our contributions in this work can be summarised as follows:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [61.259342193603516, 302.3851318359375, 286.62158203125, 348.7479553222656], "page": 2, "span": [0, 166], "__ref_s3_data": null}], "text": "\u00b7 We propose TableFormer , a transformer based model that predicts tables structure and bounding boxes for the table content simultaneously in an end-to-end approach.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [61.2220458984375, 244.87574768066406, 286.3648986816406, 291.1054992675781], "page": 2, "span": [0, 181], "__ref_s3_data": null}], "text": "\u00b7 Across all benchmark datasets TableFormer significantly outperforms existing state-of-the-art metrics, while being much more efficient in training and inference to existing works.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [61.282081604003906, 199.1899871826172, 286.36492919921875, 233.3485107421875], "page": 2, "span": [0, 106], "__ref_s3_data": null}], "text": "\u00b7 We present SynthTabNet a synthetically generated dataset, with various appearance styles and complexity.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [61.136634826660156, 153.24447631835938, 286.3650817871094, 187.8387451171875], "page": 2, "span": [0, 131], "__ref_s3_data": null}], "text": "\u00b7 An augmented dataset based on PubTabNet [37], FinTabNet [36], and TableBank [17] with generated ground-truth for reproducibility.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.235233306884766, 96.42024230957031, 286.66033935546875, 142.322998046875], "page": 2, "span": [0, 231], "__ref_s3_data": null}], "text": "The paper is structured as follows. In Sec. 2, we give a brief overview of the current state-of-the-art. In Sec. 3, we describe the datasets on which we train. In Sec. 4, we introduce the TableFormer model-architecture and describe", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [60.97100067138672, 78.53706359863281, 183.7305450439453, 87.67019653320312], "page": 2, "span": [0, 40], "__ref_s3_data": null}], "text": "$^{1}$https://github.com/IBM/SynthTabNet", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [294.6210632324219, 48.96015548706055, 300.1224365234375, 58.64695739746094], "page": 2, "span": [0, 1], "__ref_s3_data": null}], "text": "2", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [308.2301025390625, 683.5604248046875, 545.4613037109375, 717.81787109375], "page": 2, "span": [0, 166], "__ref_s3_data": null}], "text": "its results & performance in Sec. 5. As a conclusion, we describe how this new model-architecture can be re-purposed for other tasks in the computer-vision community.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9626770019531, 659.5203247070312, 498.43707275390625, 671.0046997070312], "page": 2, "span": [0, 37], "__ref_s3_data": null}], "text": "2. Previous work and State of the Art", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [307.70526123046875, 461.54498291015625, 545.4183959960938, 651.0844116210938], "page": 2, "span": [0, 901], "__ref_s3_data": null}], "text": "Identifying the structure of a table has been an outstanding problem in the document-parsing community, that motivates many organised public challenges [6, 4, 14]. The difficulty of the problem can be attributed to a number of factors. First, there is a large variety in the shapes and sizes of tables. Such large variety requires a flexible method. This is especially true for complex column- and row headers, which can be extremely intricate and demanding. A second factor of complexity is the lack of data with regard to table-structure. Until the publication of PubTabNet [37], there were no large datasets (i.e. > 100 K tables) that provided structure information. This happens primarily due to the fact that tables are notoriously time-consuming to annotate by hand. However, this has definitely changed in recent years with the deliverance of PubTabNet [37], FinTabNet [36], TableBank [17] etc.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.6594543457031, 341.9270935058594, 545.2876586914062, 459.7470397949219], "page": 2, "span": [0, 552], "__ref_s3_data": null}], "text": "Before the rising popularity of deep neural networks, the community relied heavily on heuristic and/or statistical methods to do table structure identification [3, 7, 11, 5, 13, 28]. Although such methods work well on constrained tables [12], a more data-driven approach can be applied due to the advent of convolutional neural networks (CNNs) and the availability of large datasets. To the best-of-our knowledge, there are currently two different types of network architecture that are being pursued for state-of-the-art tablestructure identification.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.7598571777344, 78.7093505859375, 545.48876953125, 340.0845947265625], "page": 2, "span": [0, 1262], "__ref_s3_data": null}], "text": "Image-to-Text networks : In this type of network, one predicts a sequence of tokens starting from an encoded image. Such sequences of tokens can be HTML table tags [37, 17] or LaTeX symbols[10]. The choice of symbols is ultimately not very important, since one can be transformed into the other. There are however subtle variations in the Image-to-Text networks. The easiest network architectures are \"image-encoder \u2192 text-decoder\" (IETD), similar to network architectures that try to provide captions to images [32]. In these IETD networks, one expects as output the LaTeX/HTML string of the entire table, i.e. the symbols necessary for creating the table with the content of the table. Another approach is the \"image-encoder \u2192 dual decoder\" (IEDD) networks. In these type of networks, one has two consecutive decoders with different purposes. The first decoder is the tag-decoder , i.e. it only produces the HTML/LaTeX tags which construct an empty table. The second content-decoder uses the encoding of the image in combination with the output encoding of each cell-tag (from the tag-decoder ) to generate the textual content of each table cell. The network architecture of IEDD is certainly more elaborate, but it has the advantage that one can pre-train the", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.48567581176758, 707.8850708007812, 250.15101623535156, 717.7049560546875], "page": 3, "span": [0, 51], "__ref_s3_data": null}], "text": "tag-decoder which is constrained to the table-tags.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.11482238769531, 515.9454345703125, 286.3651428222656, 706.0012817382812], "page": 3, "span": [0, 864], "__ref_s3_data": null}], "text": "In practice, both network architectures (IETD and IEDD) require an implicit, custom trained object-characterrecognition (OCR) to obtain the content of the table-cells. In the case of IETD, this OCR engine is implicit in the decoder similar to [24]. For the IEDD, the OCR is solely embedded in the content-decoder. This reliance on a custom, implicit OCR decoder is of course problematic. OCR is a well known and extremely tough problem, that often needs custom training for each individual language. However, the limited availability for non-english content in the current datasets, makes it impractical to apply the IETD and IEDD methods on tables with other languages. Additionally, OCR can be completely omitted if the tables originate from programmatic PDF documents with known positions of each cell. The latter was the inspiration for the work of this paper.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.13818359375, 301.297119140625, 286.5478210449219, 514.357421875], "page": 3, "span": [0, 1007], "__ref_s3_data": null}], "text": "Graph Neural networks : Graph Neural networks (GNN's) take a radically different approach to tablestructure extraction. Note that one table cell can constitute out of multiple text-cells. To obtain the table-structure, one creates an initial graph, where each of the text-cells becomes a node in the graph similar to [33, 34, 2]. Each node is then associated with en embedding vector coming from the encoded image, its coordinates and the encoded text. Furthermore, nodes that represent adjacent text-cells are linked. Graph Convolutional Networks (GCN's) based methods take the image as an input, but also the position of the text-cells and their content [18]. The purpose of a GCN is to transform the input graph into a new graph, which replaces the old links with new ones. The new links then represent the table-structure. With this approach, one can avoid the need to build custom OCR decoders. However, the quality of the reconstructed structure is not comparable to the current state-of-the-art [18].", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.34700393676758, 169.18182373046875, 286.6784362792969, 299.23980712890625], "page": 3, "span": [0, 619], "__ref_s3_data": null}], "text": "Hybrid Deep Learning-Rule-Based approach : A popular current model for table-structure identification is the use of a hybrid Deep Learning-Rule-Based approach similar to [27, 29]. In this approach, one first detects the position of the table-cells with object detection (e.g. YoloVx or MaskRCNN), then classifies the table into different types (from its images) and finally uses different rule-sets to obtain its table-structure. Currently, this approach achieves stateof-the-art results, but is not an end-to-end deep-learning method. As such, new rules need to be written if different types of tables are encountered.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.34483337402344, 145.30743408203125, 105.30262756347656, 156.634765625], "page": 3, "span": [0, 11], "__ref_s3_data": null}], "text": "3. Datasets", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.3426628112793, 78.83715057373047, 286.368896484375, 136.55197143554688], "page": 3, "span": [0, 281], "__ref_s3_data": null}], "text": "We rely on large-scale datasets such as PubTabNet [37], FinTabNet [36], and TableBank [17] datasets to train and evaluate our models. These datasets span over various appearance styles and content. We also introduce our own synthetically generated SynthTabNet dataset to fix an im-", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [294.4361877441406, 48.96023941040039, 300.102294921875, 58.61145782470703], "page": 3, "span": [0, 1], "__ref_s3_data": null}], "text": "3", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [308.0231628417969, 503.3020935058594, 545.1151123046875, 524.915283203125], "page": 3, "span": [0, 104], "__ref_s3_data": null}], "text": "Figure 2: Distribution of the tables across different table dimensions in PubTabNet + FinTabNet datasets", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"prov": [{"bbox": [307.9623718261719, 465.4779968261719, 437.27001953125, 475.4662170410156], "page": 3, "span": [0, 33], "__ref_s3_data": null}], "text": "balance in the previous datasets.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9255065917969, 164.26060485839844, 545.6851196289062, 461.7036437988281], "page": 3, "span": [0, 1400], "__ref_s3_data": null}], "text": "The PubTabNet dataset contains 509k tables delivered as annotated PNG images. The annotations consist of the table structure represented in HTML format, the tokenized text and its bounding boxes per table cell. Fig. 1 shows the appearance style of PubTabNet. Depending on its complexity, a table is characterized as \"simple\" when it does not contain row spans or column spans, otherwise it is \"complex\". The dataset is divided into Train and Val splits (roughly 98% and 2%). The Train split consists of 54% simple and 46% complex tables and the Val split of 51% and 49% respectively. The FinTabNet dataset contains 112k tables delivered as single-page PDF documents with mixed table structures and text content. Similarly to the PubTabNet, the annotations of FinTabNet include the table structure in HTML, the tokenized text and the bounding boxes on a table cell basis. The dataset is divided into Train, Test and Val splits (81%, 9.5%, 9.5%), and each one is almost equally divided into simple and complex tables (Train: 48% simple, 52% complex, Test: 48% simple, 52% complex, Test: 53% simple, 47% complex). Finally the TableBank dataset consists of 145k tables provided as JPEG images. The latter has annotations for the table structure, but only few with bounding boxes of the table cells. The entire dataset consists of simple tables and it is divided into 90% Train, 3% Test and 7% Val splits.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.7597351074219, 78.84823608398438, 545.282958984375, 160.3302001953125], "page": 3, "span": [0, 406], "__ref_s3_data": null}], "text": "Due to the heterogeneity across the dataset formats, it was necessary to combine all available data into one homogenized dataset before we could train our models for practical purposes. Given the size of PubTabNet, we adopted its annotation format and we extracted and converted all tables as PNG images with a resolution of 72 dpi. Additionally, we have filtered out tables with extreme sizes due to small", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.14799118041992, 695.9300537109375, 286.3651123046875, 717.7029418945312], "page": 4, "span": [0, 93], "__ref_s3_data": null}], "text": "amount of such tables, and kept only those ones ranging between 1*1 and 20*10 (rows/columns).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.21862030029297, 478.8949279785156, 286.5638732910156, 691.9474487304688], "page": 4, "span": [0, 983], "__ref_s3_data": null}], "text": "The availability of the bounding boxes for all table cells is essential to train our models. In order to distinguish between empty and non-empty bounding boxes, we have introduced a binary class in the annotation. Unfortunately, the original datasets either omit the bounding boxes for whole tables (e.g. TableBank) or they narrow their scope only to non-empty cells. Therefore, it was imperative to introduce a data pre-processing procedure that generates the missing bounding boxes out of the annotation information. This procedure first parses the provided table structure and calculates the dimensions of the most fine-grained grid that covers the table structure. Notice that each table cell may occupy multiple grid squares due to row or column spans. In case of PubTabNet we had to compute missing bounding boxes for 48% of the simple and 69% of the complex tables. Regarding FinTabNet, 68% of the simple and 98% of the complex tables require the generation of bounding boxes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.344085693359375, 356.89642333984375, 286.75341796875, 474.9582214355469], "page": 4, "span": [0, 571], "__ref_s3_data": null}], "text": "As it is illustrated in Fig. 2, the table distributions from all datasets are skewed towards simpler structures with fewer number of rows/columns. Additionally, there is very limited variance in the table styles, which in case of PubTabNet and FinTabNet means one styling format for the majority of the tables. Similar limitations appear also in the type of table content, which in some cases (e.g. FinTabNet) is restricted to a certain domain. Ultimately, the lack of diversity in the training dataset damages the ability of the models to generalize well on unseen data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.167423248291016, 164.30380249023438, 286.73486328125, 353.8109436035156], "page": 4, "span": [0, 941], "__ref_s3_data": null}], "text": "Motivated by those observations we aimed at generating a synthetic table dataset named SynthTabNet . This approach offers control over: 1) the size of the dataset, 2) the table structure, 3) the table style and 4) the type of content. The complexity of the table structure is described by the size of the table header and the table body, as well as the percentage of the table cells covered by row spans and column spans. A set of carefully designed styling templates provides the basis to build a wide range of table appearances. Lastly, the table content is generated out of a curated collection of text corpora. By controlling the size and scope of the synthetic datasets we are able to train and evaluate our models in a variety of different conditions. For example, we can first generate a highly diverse dataset to train our models and then evaluate their performance on other synthetic datasets which are focused on a specific domain.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.45711135864258, 78.84810638427734, 286.5352783203125, 160.2691650390625], "page": 4, "span": [0, 405], "__ref_s3_data": null}], "text": "In this regard, we have prepared four synthetic datasets, each one containing 150k examples. The corpora to generate the table text consists of the most frequent terms appearing in PubTabNet and FinTabNet together with randomly generated text. The first two synthetic datasets have been fine-tuned to mimic the appearance of the original datasets but encompass more complicated table structures. The third", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [294.497802734375, 48.96018600463867, 300.2393798828125, 58.43728256225586], "page": 4, "span": [0, 1], "__ref_s3_data": null}], "text": "4", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [307.6622009277344, 567.6110229492188, 545.1150512695312, 625.0201416015625], "page": 4, "span": [0, 267], "__ref_s3_data": null}], "text": "Table 1: Both \"Combined-Tabnet\" and \"CombinedTabnet\" are variations of the following: (*) The CombinedTabnet dataset is the processed combination of PubTabNet and Fintabnet. (**) The combined dataset is the processed combination of PubTabNet, Fintabnet and TableBank.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/2"}, {"prov": [{"bbox": [307.8670349121094, 497.6080322265625, 545.1443481445312, 542.9603271484375], "page": 4, "span": [0, 210], "__ref_s3_data": null}], "text": "one adopts a colorful appearance with high contrast and the last one contains tables with sparse content. Lastly, we have combined all synthetic datasets into one big unified synthetic dataset of 600k examples.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [320.144287109375, 485.321044921875, 542.7439575195312, 494.8341979980469], "page": 4, "span": [0, 57], "__ref_s3_data": null}], "text": "Tab. 1 summarizes the various attributes of the datasets.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9104309082031, 460.0683288574219, 444.9360656738281, 471.7930908203125], "page": 4, "span": [0, 24], "__ref_s3_data": null}], "text": "4. The TableFormer model", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [307.71002197265625, 344.3941345214844, 545.5623779296875, 451.22589111328125], "page": 4, "span": [0, 504], "__ref_s3_data": null}], "text": "Given the image of a table, TableFormer is able to predict: 1) a sequence of tokens that represent the structure of a table, and 2) a bounding box coupled to a subset of those tokens. The conversion of an image into a sequence of tokens is a well-known task [35, 16]. While attention is often used as an implicit method to associate each token of the sequence with a position in the original image, an explicit association between the individual table-cells and the image bounding boxes is also required.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.70916748046875, 324.45367431640625, 420.16058349609375, 334.8069763183594], "page": 4, "span": [0, 24], "__ref_s3_data": null}], "text": "4.1. Model architecture.", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [307.7804870605469, 127.00019073486328, 545.635986328125, 316.6053466796875], "page": 4, "span": [0, 907], "__ref_s3_data": null}], "text": "We now describe in detail the proposed method, which is composed of three main components, see Fig. 4. Our CNN Backbone Network encodes the input as a feature vector of predefined length. The input feature vector of the encoded image is passed to the Structure Decoder to produce a sequence of HTML tags that represent the structure of the table. With each prediction of an HTML standard data cell (' < td > ') the hidden state of that cell is passed to the Cell BBox Decoder. As for spanning cells, such as row or column span, the tag is broken down to ' < ', 'rowspan=' or 'colspan=', with the number of spanning cells (attribute), and ' > '. The hidden state attached to ' < ' is passed to the Cell BBox Decoder. A shared feed forward network (FFN) receives the hidden states from the Structure Decoder, to provide the final detection predictions of the bounding box coordinates and their classification.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.7881164550781, 78.76947021484375, 545.50244140625, 124.62178039550781], "page": 4, "span": [0, 223], "__ref_s3_data": null}], "text": "CNN Backbone Network. A ResNet-18 CNN is the backbone that receives the table image and encodes it as a vector of predefined length. The network has been modified by removing the linear and pooling layer, as we are not per-", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.26235580444336, 566.526611328125, 545.1981201171875, 588.8561401367188], "page": 5, "span": [0, 212], "__ref_s3_data": null}], "text": "Figure 3: TableFormer takes in an image of the PDF and creates bounding box and HTML structure predictions that are synchronized. The bounding boxes grabs the content from the PDF and inserts it in the structure.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"prov": [{"bbox": [49.09820556640625, 111.42295837402344, 286.6905822753906, 265.1697998046875], "page": 5, "span": [0, 745], "__ref_s3_data": null}], "text": "Figure 4: Given an input image of a table, the Encoder produces fixed-length features that represent the input image. The features are then passed to both the Structure Decoder and Cell BBox Decoder . During training, the Structure Decoder receives 'tokenized tags' of the HTML code that represent the table structure. Afterwards, a transformer encoder and decoder architecture is employed to produce features that are received by a linear layer, and the Cell BBox Decoder. The linear layer is applied to the features to predict the tags. Simultaneously, the Cell BBox Decoder selects features referring to the data cells (' < td > ', ' < ') and passes them through an attention network, an MLP, and a linear layer to predict the bounding boxes.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"prov": [{"bbox": [308.04541015625, 497.69305419921875, 545.3578491210938, 543.414794921875], "page": 5, "span": [0, 227], "__ref_s3_data": null}], "text": "forming classification, and adding an adaptive pooling layer of size 28*28. ResNet by default downsamples the image resolution by 32 and then the encoded image is provided to both the Structure Decoder , and Cell BBox Decoder .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.867919921875, 377.2391662597656, 545.4237060546875, 495.6871337890625], "page": 5, "span": [0, 563], "__ref_s3_data": null}], "text": "Structure Decoder. The transformer architecture of this component is based on the work proposed in [31]. After extensive experimentation, the Structure Decoder is modeled as a transformer encoder with two encoder layers and a transformer decoder made from a stack of 4 decoder layers that comprise mainly of multi-head attention and feed forward layers. This configuration uses fewer layers and heads in comparison to networks applied to other problems (e.g. \"Scene Understanding\", \"Image Captioning\"), something which we relate to the simplicity of table images.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.0282287597656, 246.4272918701172, 545.38037109375, 375.6052551269531], "page": 5, "span": [0, 592], "__ref_s3_data": null}], "text": "The transformer encoder receives an encoded image from the CNN Backbone Network and refines it through a multi-head dot-product attention layer, followed by a Feed Forward Network. During training, the transformer decoder receives as input the output feature produced by the transformer encoder, and the tokenized input of the HTML ground-truth tags. Using a stack of multi-head attention layers, different aspects of the tag sequence could be inferred. This is achieved by each attention head on a layer operating in a different subspace, and then combining altogether their attention score.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9319152832031, 138.2592010498047, 545.4405517578125, 244.52415466308594], "page": 5, "span": [0, 483], "__ref_s3_data": null}], "text": "Cell BBox Decoder. Our architecture allows to simultaneously predict HTML tags and bounding boxes for each table cell without the need of a separate object detector end to end. This approach is inspired by DETR [1] which employs a Transformer Encoder, and Decoder that looks for a specific number of object queries (potential object detections). As our model utilizes a transformer architecture, the hidden state of the < td > ' and ' < ' HTML structure tags become the object query.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.91424560546875, 78.50503540039062, 545.3233032226562, 136.1419219970703], "page": 5, "span": [0, 286], "__ref_s3_data": null}], "text": "The encoding generated by the CNN Backbone Network along with the features acquired for every data cell from the Transformer Decoder are then passed to the attention network. The attention network takes both inputs and learns to provide an attention weighted encoding. This weighted at-", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [294.6537170410156, 48.96027755737305, 300.10223388671875, 58.62459945678711], "page": 5, "span": [0, 1], "__ref_s3_data": null}], "text": "5", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [49.33885192871094, 635.6671752929688, 286.5264587402344, 717.6875610351562], "page": 6, "span": [0, 380], "__ref_s3_data": null}], "text": "tention encoding is then multiplied to the encoded image to produce a feature for each table cell. Notice that this is different than the typical object detection problem where imbalances between the number of detections and the amount of objects may exist. In our case, we know up front that the produced detections always match with the table cells in number and correspondence.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.410404205322266, 551.5345458984375, 286.57147216796875, 632.9039916992188], "page": 6, "span": [0, 371], "__ref_s3_data": null}], "text": "The output features for each table cell are then fed into the feed-forward network (FFN). The FFN consists of a Multi-Layer Perceptron (3 layers with ReLU activation function) that predicts the normalized coordinates for the bounding box of each table cell. Finally, the predicted bounding boxes are classified based on whether they are empty or not using a linear layer.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.193443298339844, 347.69293212890625, 286.7899475097656, 548.953125], "page": 6, "span": [0, 985], "__ref_s3_data": null}], "text": "Loss Functions. We formulate a multi-task loss Eq. 2 to train our network. The Cross-Entropy loss (denoted as l$_{s}$ ) is used to train the Structure Decoder which predicts the structure tokens. As for the Cell BBox Decoder it is trained with a combination of losses denoted as l$_{box}$ . l$_{box}$ consists of the generally used l$_{1}$ loss for object detection and the IoU loss ( l$_{iou}$ ) to be scale invariant as explained in [25]. In comparison to DETR, we do not use the Hungarian algorithm [15] to match the predicted bounding boxes with the ground-truth boxes, as we have already achieved a one-toone match through two steps: 1) Our token input sequence is naturally ordered, therefore the hidden states of the table data cells are also in order when they are provided as input to the Cell BBox Decoder , and 2) Our bounding boxes generation mechanism (see Sec. 3) ensures a one-to-one mapping between the cell content and its bounding box for all post-processed datasets.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.70558166503906, 323.12811279296875, 286.364990234375, 344.9725341796875], "page": 6, "span": [0, 67], "__ref_s3_data": null}], "text": "The loss used to train the TableFormer can be defined as following:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [123.6725845336914, 274.2152404785156, 286.3624267578125, 299.7832336425781], "page": 6, "span": [0, 84], "__ref_s3_data": null}], "text": "l$_{box}$ = \u03bb$_{iou}$l$_{iou}$ + \u03bb$_{l}$$_{1}$ l = \u03bbl$_{s}$ + (1 - \u03bb ) l$_{box}$ (1)", "type": "equation", "name": "Formula", "font": null}, {"prov": [{"bbox": [49.33706283569336, 251.3388671875, 281.596923828125, 261.4623718261719], "page": 6, "span": [0, 76], "__ref_s3_data": null}], "text": "where \u03bb \u2208 [0, 1], and \u03bb$_{iou}$, \u03bb$_{l}$$_{1}$ \u2208$_{R}$ are hyper-parameters.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.722251892089844, 225.24951171875, 172.0424041748047, 236.80613708496094], "page": 6, "span": [0, 23], "__ref_s3_data": null}], "text": "5. Experimental Results", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.44451904296875, 205.55343627929688, 179.17501831054688, 216.1990509033203], "page": 6, "span": [0, 27], "__ref_s3_data": null}], "text": "5.1. Implementation Details", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.39978790283203, 150.96865844726562, 286.6137390136719, 196.80064392089844], "page": 6, "span": [0, 207], "__ref_s3_data": null}], "text": "TableFormer uses ResNet-18 as the CNN Backbone Network . The input images are resized to 448*448 pixels and the feature map has a dimension of 28*28. Additionally, we enforce the following input constraints:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [91.03668212890625, 113.60411834716797, 286.3624572753906, 138.33775329589844], "page": 6, "span": [0, 77], "__ref_s3_data": null}], "text": "Image width and height \u2264 1024 pixels Structural tags length \u2264 512 tokens. (2)", "type": "equation", "name": "Formula", "font": null}, {"prov": [{"bbox": [49.08633041381836, 78.79530334472656, 286.4946594238281, 100.13447570800781], "page": 6, "span": [0, 117], "__ref_s3_data": null}], "text": "Although input constraints are used also by other methods, such as EDD, ours are less restrictive due to the improved", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [294.5410461425781, 48.96010971069336, 300.3391418457031, 58.42277908325195], "page": 6, "span": [0, 1], "__ref_s3_data": null}], "text": "6", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [307.90777587890625, 683.57861328125, 545.310302734375, 717.6608276367188], "page": 6, "span": [0, 156], "__ref_s3_data": null}], "text": "runtime performance and lower memory footprint of TableFormer. This allows to utilize input samples with longer sequences and images with larger dimensions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.8103332519531, 463.4059143066406, 545.45947265625, 677.0466918945312], "page": 6, "span": [0, 1024], "__ref_s3_data": null}], "text": "The Transformer Encoder consists of two \"Transformer Encoder Layers\", with an input feature size of 512, feed forward network of 1024, and 4 attention heads. As for the Transformer Decoder it is composed of four \"Transformer Decoder Layers\" with similar input and output dimensions as the \"Transformer Encoder Layers\". Even though our model uses fewer layers and heads than the default implementation parameters, our extensive experimentation has proved this setup to be more suitable for table images. We attribute this finding to the inherent design of table images, which contain mostly lines and text, unlike the more elaborate content present in other scopes (e.g. the COCO dataset). Moreover, we have added ResNet blocks to the inputs of the Structure Decoder and Cell BBox Decoder. This prevents a decoder having a stronger influence over the learned weights which would damage the other prediction task (structure vs bounding boxes), but learn task specific weights instead. Lastly our dropout layers are set to 0.5.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.79974365234375, 362.4031982421875, 545.5565185546875, 456.2726745605469], "page": 6, "span": [0, 419], "__ref_s3_data": null}], "text": "For training, TableFormer is trained with 3 Adam optimizers, each one for the CNN Backbone Network , Structure Decoder , and Cell BBox Decoder . Taking the PubTabNet as an example for our parameter set up, the initializing learning rate is 0.001 for 12 epochs with a batch size of 24, and \u03bb set to 0.5. Afterwards, we reduce the learning rate to 0.0001, the batch size to 18 and train for 12 more epochs or convergence.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.99951171875, 237.81484985351562, 545.37939453125, 355.8169250488281], "page": 6, "span": [0, 528], "__ref_s3_data": null}], "text": "TableFormer is implemented with PyTorch and Torchvision libraries [22]. To speed up the inference, the image undergoes a single forward pass through the CNN Backbone Network and transformer encoder. This eliminates the overhead of generating the same features for each decoding step. Similarly, we employ a 'caching' technique to preform faster autoregressive decoding. This is achieved by storing the features of decoded tokens so we can reuse them for each time step. Therefore, we only compute the attention for each new tag.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.2174377441406, 202.5936279296875, 397.44281005859375, 212.9311981201172], "page": 6, "span": [0, 19], "__ref_s3_data": null}], "text": "5.2. Generalization", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [308.0255126953125, 119.17848205566406, 545.1151733398438, 189.6819610595703], "page": 6, "span": [0, 299], "__ref_s3_data": null}], "text": "TableFormer is evaluated on three major publicly available datasets of different nature to prove the generalization and effectiveness of our model. The datasets used for evaluation are the PubTabNet, FinTabNet and TableBank which stem from the scientific, financial and general domains respectively.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.2952575683594, 78.51296997070312, 545.2591552734375, 112.20266723632812], "page": 6, "span": [0, 155], "__ref_s3_data": null}], "text": "We also share our baseline results on the challenging SynthTabNet dataset. Throughout our experiments, the same parameters stated in Sec. 5.1 are utilized.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.5137939453125, 707.74658203125, 167.9048614501953, 718.37353515625], "page": 7, "span": [0, 25], "__ref_s3_data": null}], "text": "5.3. Datasets and Metrics", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.42784881591797, 653.5985107421875, 286.3651123046875, 699.5994873046875], "page": 7, "span": [0, 192], "__ref_s3_data": null}], "text": "The Tree-Edit-Distance-Based Similarity (TEDS) metric was introduced in [37]. It represents the prediction, and ground-truth as a tree structure of HTML tags. This similarity is calculated as:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [85.80982208251953, 618.6690063476562, 286.3623962402344, 642.4047241210938], "page": 7, "span": [0, 99], "__ref_s3_data": null}], "text": "TEDS ( T$_{a}$, T$_{b}$ ) = 1 - EditDist ( T$_{a}$, T$_{b}$ ) max ( | T$_{a}$ | , | T$_{b}$ | ) (3)", "type": "equation", "name": "Formula", "font": null}, {"prov": [{"bbox": [49.60887908935547, 578.02099609375, 286.5648193359375, 611.3987426757812], "page": 7, "span": [0, 162], "__ref_s3_data": null}], "text": "where T$_{a}$ and T$_{b}$ represent tables in tree structure HTML format. EditDist denotes the tree-edit distance, and | T | represents the number of nodes in T .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.53438949584961, 556.8223266601562, 170.5989990234375, 567.4805908203125], "page": 7, "span": [0, 26], "__ref_s3_data": null}], "text": "5.4. Quantitative Analysis", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.37880325317383, 395.5296630859375, 286.5152282714844, 549.0050048828125], "page": 7, "span": [0, 723], "__ref_s3_data": null}], "text": "Structure. As shown in Tab. 2, TableFormer outperforms all SOTA methods across different datasets by a large margin for predicting the table structure from an image. All the more, our model outperforms pre-trained methods. During the evaluation we do not apply any table filtering. We also provide our baseline results on the SynthTabNet dataset. It has been observed that large tables (e.g. tables that occupy half of the page or more) yield poor predictions. We attribute this issue to the image resizing during the preprocessing step, that produces downsampled images with indistinguishable features. This problem can be addressed by treating such big tables with a separate model which accepts a large input image size.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.49094772338867, 177.64779663085938, 286.5975036621094, 200.5946807861328], "page": 7, "span": [0, 101], "__ref_s3_data": null}], "text": "Table 2: Structure results on PubTabNet (PTN), FinTabNet (FTN), TableBank (TB) and SynthTabNet (STN).", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/3"}, {"prov": [{"bbox": [49.64347839355469, 166.7500762939453, 261.7873229980469, 176.285888671875], "page": 7, "span": [0, 50], "__ref_s3_data": null}], "text": "FT: Model was trained on PubTabNet then finetuned.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.33903503417969, 78.39030456542969, 286.56085205078125, 148.4354248046875], "page": 7, "span": [0, 346], "__ref_s3_data": null}], "text": "Cell Detection. Like any object detector, our Cell BBox Detector provides bounding boxes that can be improved with post-processing during inference. We make use of the grid-like structure of tables to refine the predictions. A detailed explanation on the post-processing is available in the supplementary material. As shown in Tab. 3, we evaluate", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [294.697509765625, 48.960079193115234, 300.1951904296875, 58.69574737548828], "page": 7, "span": [0, 1], "__ref_s3_data": null}], "text": "7", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [308.0060729980469, 564.2849731445312, 545.2793579101562, 717.4856567382812], "page": 7, "span": [0, 737], "__ref_s3_data": null}], "text": "our Cell BBox Decoder accuracy for cells with a class label of 'content' only using the PASCAL VOC mAP metric for pre-processing and post-processing. Note that we do not have post-processing results for SynthTabNet as images are only provided. To compare the performance of our proposed approach, we've integrated TableFormer's Cell BBox Decoder into EDD architecture. As mentioned previously, the Structure Decoder provides the Cell BBox Decoder with the features needed to predict the bounding box predictions. Therefore, the accuracy of the Structure Decoder directly influences the accuracy of the Cell BBox Decoder . If the Structure Decoder predicts an extra column, this will result in an extra column of predicted bounding boxes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.1158752441406, 454.10968017578125, 545.1151733398438, 476.3189392089844], "page": 7, "span": [0, 94], "__ref_s3_data": null}], "text": "Table 3: Cell Bounding Box detection results on PubTabNet, and FinTabNet. PP: Post-processing.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/4"}, {"prov": [{"bbox": [308.0658874511719, 271.6740417480469, 545.6917724609375, 424.9032897949219], "page": 7, "span": [0, 715], "__ref_s3_data": null}], "text": "Cell Content. In this section, we evaluate the entire pipeline of recovering a table with content. Here we put our approach to test by capitalizing on extracting content from the PDF cells rather than decoding from images. Tab. 4 shows the TEDs score of HTML code representing the structure of the table along with the content inserted in the data cell and compared with the ground-truth. Our method achieved a 5.3% increase over the state-of-the-art, and commercial solutions. We believe our scores would be higher if the HTML ground-truth matched the extracted PDF cell content. Unfortunately, there are small discrepancies such as spacings around words or special characters with various unicode representations.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.8614807128906, 102.32206726074219, 545.4934692382812, 136.0908660888672], "page": 7, "span": [0, 148], "__ref_s3_data": null}], "text": "Table 4: Results of structure with content retrieved using cell detection on PubTabNet. In all cases the input is PDF documents with cropped tables.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/5"}, {"prov": [{"bbox": [52.30960464477539, 705.2244873046875, 499.8524169921875, 714.3070068359375], "page": 8, "span": [0, 108], "__ref_s3_data": null}], "text": "a. Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.81178283691406, 689.845703125, 284.3459167480469, 697.7188720703125], "page": 8, "span": [0, 53], "__ref_s3_data": null}], "text": "Japanese language (previously unseen by TableFormer):", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [304.830810546875, 689.845703125, 431.0911865234375, 697.7188720703125], "page": 8, "span": [0, 29], "__ref_s3_data": null}], "text": "Example table from FinTabNet:", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"prov": [{"bbox": [52.96451187133789, 575.38525390625, 385.93450927734375, 584.3953857421875], "page": 8, "span": [0, 79], "__ref_s3_data": null}], "text": "b. Structure predicted by TableFormer, with superimposed matched PDF cell text:", "type": "paragraph", "name": "List-item", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/6"}, {"prov": [{"bbox": [380.35894775390625, 492.63519287109375, 549.4217529296875, 500.1993713378906], "page": 8, "span": [0, 53], "__ref_s3_data": null}], "text": "Text is aligned to match original for ease of viewing", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/7"}, {"prov": [{"bbox": [49.346073150634766, 426.3501281738281, 545.1942138671875, 472.07501220703125], "page": 8, "span": [0, 397], "__ref_s3_data": null}], "text": "Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/6"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/7"}, {"prov": [{"bbox": [62.11488342285156, 324.1943359375, 532.6304931640625, 333.92657470703125], "page": 8, "span": [0, 112], "__ref_s3_data": null}], "text": "Figure 6: An example of TableFormer predictions (bounding boxes and structure) from generated SynthTabNet table.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/8"}, {"prov": [{"bbox": [49.59223556518555, 290.7525939941406, 163.79299926757812, 301.4790344238281], "page": 8, "span": [0, 25], "__ref_s3_data": null}], "text": "5.5. Qualitative Analysis", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.32524490356445, 77.88054656982422, 286.57904052734375, 256.4608459472656], "page": 8, "span": [0, 866], "__ref_s3_data": null}], "text": "We showcase several visualizations for the different components of our network on various \"complex\" tables within datasets presented in this work in Fig. 5 and Fig. 6 As it is shown, our model is able to predict bounding boxes for all table cells, even for the empty ones. Additionally, our post-processing techniques can extract the cell content by matching the predicted bounding boxes to the PDF cells based on their overlap and spatial proximity. The left part of Fig. 5 demonstrates also the adaptability of our method to any language, as it can successfully extract Japanese text, although the training set contains only English content. We provide more visualizations including the intermediate steps in the supplementary material. Overall these illustrations justify the versatility of our method across a diverse range of table appearances and content type.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.3092041015625, 290.5433654785156, 460.8484802246094, 302.1797180175781], "page": 8, "span": [0, 27], "__ref_s3_data": null}], "text": "6. Future Work & Conclusion", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [307.91571044921875, 138.69407653808594, 545.3740234375, 280.33050537109375], "page": 8, "span": [0, 640], "__ref_s3_data": null}], "text": "In this paper, we presented TableFormer an end-to-end transformer based approach to predict table structures and bounding boxes of cells from an image. This approach enables us to recreate the table structure, and extract the cell content from PDF or OCR by using bounding boxes. Additionally, it provides the versatility required in real-world scenarios when dealing with various types of PDF documents, and languages. Furthermore, our method outperforms all state-of-the-arts with a wide margin. Finally, we introduce \"SynthTabNet\" a challenging synthetically generated dataset that reinforces missing characteristics from other datasets.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.3152770996094, 109.15335845947266, 364.5574035644531, 120.16545104980469], "page": 8, "span": [0, 10], "__ref_s3_data": null}], "text": "References", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [313.0798034667969, 78.43290710449219, 545.2075805664062, 98.85089874267578], "page": 8, "span": [0, 121], "__ref_s3_data": null}], "text": "[1] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [294.7445983886719, 48.9600715637207, 300.3660583496094, 58.06891632080078], "page": 8, "span": [0, 1], "__ref_s3_data": null}], "text": "8", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [68.82711791992188, 674.5938110351562, 286.4485778808594, 716.9931640625], "page": 9, "span": [0, 212], "__ref_s3_data": null}], "text": "end object detection with transformers. In Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm, editors, Computer Vision - ECCV 2020 , pages 213-229, Cham, 2020. Springer International Publishing. 5", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.31157302856445, 641.926513671875, 286.36334228515625, 672.84130859375], "page": 9, "span": [0, 165], "__ref_s3_data": null}], "text": "[2] Zewen Chi, Heyan Huang, Heng-Da Xu, Houjin Yu, Wanxuan Yin, and Xian-Ling Mao. Complicated table structure recognition. arXiv preprint arXiv:1908.04729 , 2019. 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.34408187866211, 608.8453369140625, 286.4925231933594, 639.6375732421875], "page": 9, "span": [0, 125], "__ref_s3_data": null}], "text": "[3] Bertrand Couasnon and Aurelie Lemaitre. Recognition of Tables and Forms , pages 647-677. Springer London, London, 2014. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.48258972167969, 564.0968017578125, 286.4743957519531, 606.2783203125], "page": 9, "span": [0, 216], "__ref_s3_data": null}], "text": "[4] Herv'e D'ejean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), Apr. 2019. http://sac.founderit.com/. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.2690315246582, 520.0095825195312, 286.5681457519531, 562.3704833984375], "page": 9, "span": [0, 236], "__ref_s3_data": null}], "text": "[5] Basilios Gatos, Dimitrios Danatsas, Ioannis Pratikakis, and Stavros J Perantonis. Automatic table detection in document images. In International Conference on Pattern Recognition and Image Analysis , pages 609-618. Springer, 2005. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.13688659667969, 476.3995056152344, 286.6510009765625, 518.47216796875], "page": 9, "span": [0, 193], "__ref_s3_data": null}], "text": "[6] Max Gobel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.40808868408203, 443.2104797363281, 286.702880859375, 474.14404296875], "page": 9, "span": [0, 165], "__ref_s3_data": null}], "text": "[7] EA Green and M Krishnamoorthy. Recognition of tables using table grammars. procs. In Symposium on Document Analysis and Recognition (SDAIR'95) , pages 261-277. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.161720275878906, 387.5791320800781, 286.3633117675781, 440.8350524902344], "page": 9, "span": [0, 273], "__ref_s3_data": null}], "text": "[8] Khurram Azeem Hashmi, Alain Pagani, Marcus Liwicki, Didier Stricker, and Muhammad Zeshan Afzal. Castabdetectors: Cascade network for table detection in document images with recursive feature pyramid and switchable atrous convolution. Journal of Imaging , 7(10), 2021. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [54.05575180053711, 354.5013732910156, 286.4024963378906, 385.7289123535156], "page": 9, "span": [0, 170], "__ref_s3_data": null}], "text": "[9] Kaiming He, Georgia Gkioxari, Piotr Dollar, and Ross Girshick. Mask r-cnn. In Proceedings of the IEEE International Conference on Computer Vision (ICCV) , Oct 2017. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.697265625, 310.1059875488281, 286.36334228515625, 352.1959533691406], "page": 9, "span": [0, 226], "__ref_s3_data": null}], "text": "[10] Yelin He, X. Qi, Jiaquan Ye, Peng Gao, Yihao Chen, Bingcong Li, Xin Tang, and Rong Xiao. Pingan-vcgroup's solution for icdar 2021 competition on scientific table image recognition to latex. ArXiv , abs/2105.01846, 2021. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.60444641113281, 255.65762329101562, 286.5947265625, 308.168701171875], "page": 9, "span": [0, 239], "__ref_s3_data": null}], "text": "[11] Jianying Hu, Ramanujan S Kashi, Daniel P Lopresti, and Gordon Wilfong. Medium-independent table detection. In Document Recognition and Retrieval VII , volume 3967, pages 291-302. International Society for Optics and Photonics, 1999. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.61537170410156, 200.55062866210938, 287.1019287109375, 253.0636444091797], "page": 9, "span": [0, 240], "__ref_s3_data": null}], "text": "[12] Matthew Hurst. A constraint-based approach to table structure derivation. In Proceedings of the Seventh International Conference on Document Analysis and Recognition - Volume 2 , ICDAR '03, page 911, USA, 2003. IEEE Computer Society. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.62745666503906, 144.75555419921875, 286.6357421875, 197.969482421875], "page": 9, "span": [0, 283], "__ref_s3_data": null}], "text": "[13] Thotreingam Kasar, Philippine Barlas, Sebastien Adam, Cl'ement Chatelain, and Thierry Paquet. Learning to detect tables in scanned document images using line information. In 2013 12th International Conference on Document Analysis and Recognition , pages 1185-1189. IEEE, 2013. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.66755294799805, 111.65099334716797, 286.39990234375, 142.97256469726562], "page": 9, "span": [0, 142], "__ref_s3_data": null}], "text": "[14] Pratik Kayal, Mrinal Anand, Harsh Desai, and Mayank Singh. Icdar 2021 competition on scientific table image recognition to latex, 2021. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.800445556640625, 79.06361389160156, 286.36163330078125, 109.40152740478516], "page": 9, "span": [0, 127], "__ref_s3_data": null}], "text": "[15] Harold W Kuhn. The hungarian method for the assignment problem. Naval research logistics quarterly , 2(1-2):83-97, 1955. 6", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [294.4941101074219, 48.96084976196289, 300.202392578125, 58.268619537353516], "page": 9, "span": [0, 1], "__ref_s3_data": null}], "text": "9", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [308.5410461425781, 653.306640625, 545.3473510742188, 717.0559692382812], "page": 9, "span": [0, 287], "__ref_s3_data": null}], "text": "[16] Girish Kulkarni, Visruth Premraj, Vicente Ordonez, Sagnik Dhar, Siming Li, Yejin Choi, Alexander C. Berg, and Tamara L. Berg. Babytalk: Understanding and generating simple image descriptions. IEEE Transactions on Pattern Analysis and Machine Intelligence , 35(12):2891-2903, 2013. 4", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.45538330078125, 619.676025390625, 545.1134033203125, 650.8286743164062], "page": 9, "span": [0, 156], "__ref_s3_data": null}], "text": "[17] Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou, and Zhoujun Li. Tablebank: A benchmark dataset for table detection and recognition, 2019. 2, 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.3742370605469, 531.7857666015625, 545.3403930664062, 617.9396362304688], "page": 9, "span": [0, 407], "__ref_s3_data": null}], "text": "[18] Yiren Li, Zheng Huang, Junchi Yan, Yi Zhou, Fan Ye, and Xianhui Liu. Gfte: Graph-based financial table extraction. In Alberto Del Bimbo, Rita Cucchiara, Stan Sclaroff, Giovanni Maria Farinella, Tao Mei, Marco Bertini, Hugo Jair Escalante, and Roberto Vezzani, editors, Pattern Recognition. ICPR International Workshops and Challenges , pages 644-658, Cham, 2021. Springer International Publishing. 2, 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.3236083984375, 465.19134521484375, 545.2802734375, 529.0132446289062], "page": 9, "span": [0, 328], "__ref_s3_data": null}], "text": "[19] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter Staar. Robust pdf document conversion using recurrent neural networks. Proceedings of the AAAI Conference on Artificial Intelligence , 35(17):15137-15145, May 2021. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.53900146484375, 420.8304138183594, 545.4502563476562, 463.07977294921875], "page": 9, "span": [0, 229], "__ref_s3_data": null}], "text": "[20] Rujiao Long, Wen Wang, Nan Xue, Feiyu Gao, Zhibo Yang, Yongpan Wang, and Gui-Song Xia. Parsing table structures in the wild. In Proceedings of the IEEE/CVF International Conference on Computer Vision , pages 944-952, 2021. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.5403747558594, 354.6231689453125, 545.1134643554688, 419.1334533691406], "page": 9, "span": [0, 315], "__ref_s3_data": null}], "text": "[21] Shubham Singh Paliwal, D Vishwanath, Rohit Rahul, Monika Sharma, and Lovekesh Vig. Tablenet: Deep learning model for end-to-end table detection and tabular data extraction from scanned document images. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 128-133. IEEE, 2019. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.5554504394531, 233.79345703125, 545.36865234375, 352.73980712890625], "page": 9, "span": [0, 592], "__ref_s3_data": null}], "text": "[22] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. Pytorch: An imperative style, high-performance deep learning library. In H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch'e-Buc, E. Fox, and R. Garnett, editors, Advances in Neural Information Processing Systems 32 , pages 8024-8035. Curran Associates, Inc., 2019. 6", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.3363952636719, 166.97824096679688, 545.676025390625, 231.79393005371094], "page": 9, "span": [0, 322], "__ref_s3_data": null}], "text": "[23] Devashish Prasad, Ayan Gadpal, Kshitij Kapadni, Manish Visave, and Kavita Sultanpure. Cascadetabnet: An approach for end to end table detection and structure recognition from image-based documents. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops , pages 572-573, 2020. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.458740234375, 123.00692749023438, 545.4988403320312, 165.11456298828125], "page": 9, "span": [0, 224], "__ref_s3_data": null}], "text": "[24] Shah Rukh Qasim, Hassan Mahmood, and Faisal Shafait. Rethinking table recognition using graph neural networks. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 142-147. IEEE, 2019. 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.3419189453125, 79.0348892211914, 545.3016357421875, 121.25286865234375], "page": 9, "span": [0, 229], "__ref_s3_data": null}], "text": "[25] Hamid Rezatofighi, Nathan Tsoi, JunYoung Gwak, Amir Sadeghian, Ian Reid, and Silvio Savarese. Generalized intersection over union: A metric and a loss for bounding box regression. In Proceedings of the IEEE/CVF Conference on", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [69.3697280883789, 697.1412353515625, 286.36175537109375, 716.9249267578125], "page": 10, "span": [0, 64], "__ref_s3_data": null}], "text": "Computer Vision and Pattern Recognition , pages 658-666, 2019. 6", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.832733154296875, 631.0233154296875, 286.4916076660156, 694.9277954101562], "page": 10, "span": [0, 302], "__ref_s3_data": null}], "text": "[26] Sebastian Schreiber, Stefan Agne, Ivo Wolf, Andreas Dengel, and Sheraz Ahmed. Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 11621167, 2017. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.52198791503906, 564.773193359375, 286.3633728027344, 628.47607421875], "page": 10, "span": [0, 308], "__ref_s3_data": null}], "text": "[27] Sebastian Schreiber, Stefan Agne, Ivo Wolf, Andreas Dengel, and Sheraz Ahmed. Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In 2017 14th IAPR international conference on document analysis and recognition (ICDAR) , volume 1, pages 1162-1167. IEEE, 2017. 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.72427749633789, 520.7044677734375, 286.36578369140625, 562.277099609375], "page": 10, "span": [0, 183], "__ref_s3_data": null}], "text": "[28] Faisal Shafait and Ray Smith. Table detection in heterogeneous documents. In Proceedings of the 9th IAPR International Workshop on Document Analysis Systems , pages 6572, 2010. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.75909423828125, 465.14605712890625, 286.9628601074219, 518.4593505859375], "page": 10, "span": [0, 275], "__ref_s3_data": null}], "text": "[29] Shoaib Ahmed Siddiqui, Imran Ali Fateh, Syed Tahseen Raza Rizvi, Andreas Dengel, and Sheraz Ahmed. Deeptabstr: Deep learning based table structure recognition. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 1403-1409. IEEE, 2019. 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.69480895996094, 410.3855285644531, 286.36334228515625, 463.3351135253906], "page": 10, "span": [0, 251], "__ref_s3_data": null}], "text": "[30] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD , KDD '18, pages 774-782, New York, NY, USA, 2018. ACM. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.900028228759766, 333.3085632324219, 286.3638916015625, 407.70587158203125], "page": 10, "span": [0, 366], "__ref_s3_data": null}], "text": "[31] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. Attention is all you need. In I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett, editors, Advances in Neural Information Processing Systems 30 , pages 5998-6008. Curran Associates, Inc., 2017. 5", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.75345993041992, 289.1075744628906, 286.42437744140625, 331.030029296875], "page": 10, "span": [0, 221], "__ref_s3_data": null}], "text": "[32] Oriol Vinyals, Alexander Toshev, Samy Bengio, and Dumitru Erhan. Show and tell: A neural image caption generator. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) , June 2015. 2", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.72990417480469, 244.6327667236328, 286.7159118652344, 286.82855224609375], "page": 10, "span": [0, 217], "__ref_s3_data": null}], "text": "[33] Wenyuan Xue, Qingyong Li, and Dacheng Tao. Res2tim: reconstruct syntactic structures from table images. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 749-755. IEEE, 2019. 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.77635192871094, 200.70655822753906, 286.65618896484375, 242.16712951660156], "page": 10, "span": [0, 190], "__ref_s3_data": null}], "text": "[34] Wenyuan Xue, Baosheng Yu, Wen Wang, Dacheng Tao, and Qingyong Li. Tgrnet: A table graph reconstruction network for table structure recognition. arXiv preprint arXiv:2106.10598 , 2021. 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.754791259765625, 156.28453063964844, 286.94097900390625, 197.91392517089844], "page": 10, "span": [0, 220], "__ref_s3_data": null}], "text": "[35] Quanzeng You, Hailin Jin, Zhaowen Wang, Chen Fang, and Jiebo Luo. Image captioning with semantic attention. In Proceedings of the IEEE conference on computer vision and pattern recognition , pages 4651-4659, 2016. 4", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.780601501464844, 101.19727325439453, 286.3633728027344, 154.27796936035156], "page": 10, "span": [0, 280], "__ref_s3_data": null}], "text": "[36] Xinyi Zheng, Doug Burdick, Lucian Popa, Peter Zhong, and Nancy Xin Ru Wang. Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. Winter Conference for Applications in Computer Vision (WACV) , 2021. 2, 3", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.777278900146484, 78.89190673828125, 286.36334228515625, 98.63412475585938], "page": 10, "span": [0, 106], "__ref_s3_data": null}], "text": "[37] Xu Zhong, Elaheh ShafieiBavani, and Antonio Jimeno Yepes. Image-based table recognition: Data, model,", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [292.6300048828125, 48.960445404052734, 302.6481628417969, 58.501861572265625], "page": 10, "span": [0, 2], "__ref_s3_data": null}], "text": "10", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [326.9217224121094, 674.8087158203125, 545.315673828125, 716.8610229492188], "page": 10, "span": [0, 192], "__ref_s3_data": null}], "text": "and evaluation. In Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm, editors, Computer Vision ECCV 2020 , pages 564-580, Cham, 2020. Springer International Publishing. 2, 3, 7", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.47698974609375, 629.8935546875, 545.8077392578125, 672.1726684570312], "page": 10, "span": [0, 221], "__ref_s3_data": null}], "text": "[38] Xu Zhong, Jianbin Tang, and Antonio Jimeno Yepes. Publaynet: Largest dataset ever for document layout analysis. In 2019 International Conference on Document Analysis and Recognition (ICDAR) , pages 1015-1022, 2019. 1", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [131.88760375976562, 669.9490966796875, 465.37677001953125, 682.1016235351562], "page": 11, "span": [0, 60], "__ref_s3_data": null}], "text": "TableFormer: Table Structure Understanding with Transformers", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [219.5913848876953, 656.1969604492188, 375.0426940917969, 669.7401733398438], "page": 11, "span": [0, 22], "__ref_s3_data": null}], "text": "Supplementary Material", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [50.08771514892578, 620.0913696289062, 175.96437072753906, 631.563232421875], "page": 11, "span": [0, 26], "__ref_s3_data": null}], "text": "1. Details on the datasets", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.920230865478516, 600.6398315429688, 150.364013671875, 611.6488037109375], "page": 11, "span": [0, 21], "__ref_s3_data": null}], "text": "1.1. Data preparation", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.31182098388672, 403.1282653808594, 286.73223876953125, 592.9334716796875], "page": 11, "span": [0, 931], "__ref_s3_data": null}], "text": "As a first step of our data preparation process, we have calculated statistics over the datasets across the following dimensions: (1) table size measured in the number of rows and columns, (2) complexity of the table, (3) strictness of the provided HTML structure and (4) completeness (i.e. no omitted bounding boxes). A table is considered to be simple if it does not contain row spans or column spans. Additionally, a table has a strict HTML structure if every row has the same number of columns after taking into account any row or column spans. Therefore a strict HTML structure looks always rectangular. However, HTML is a lenient encoding format, i.e. tables with rows of different sizes might still be regarded as correct due to implicit display rules. These implicit rules leave room for ambiguity, which we want to avoid. As such, we prefer to have \"strict\" tables, i.e. tables where every row has exactly the same length.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.21672058105469, 163.6554412841797, 286.7598571777344, 401.6038818359375], "page": 11, "span": [0, 1149], "__ref_s3_data": null}], "text": "We have developed a technique that tries to derive a missing bounding box out of its neighbors. As a first step, we use the annotation data to generate the most fine-grained grid that covers the table structure. In case of strict HTML tables, all grid squares are associated with some table cell and in the presence of table spans a cell extends across multiple grid squares. When enough bounding boxes are known for a rectangular table, it is possible to compute the geometrical border lines between the grid rows and columns. Eventually this information is used to generate the missing bounding boxes. Additionally, the existence of unused grid squares indicates that the table rows have unequal number of columns and the overall structure is non-strict. The generation of missing bounding boxes for non-strict HTML tables is ambiguous and therefore quite challenging. Thus, we have decided to simply discard those tables. In case of PubTabNet we have computed missing bounding boxes for 48% of the simple and 69% of the complex tables. Regarding FinTabNet, 68% of the simple and 98% of the complex tables require the generation of bounding boxes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.46216583251953, 140.3820037841797, 286.3649597167969, 162.59068298339844], "page": 11, "span": [0, 92], "__ref_s3_data": null}], "text": "Figure 7 illustrates the distribution of the tables across different dimensions per dataset.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [50.11198425292969, 119.7578125, 153.92388916015625, 130.09898376464844], "page": 11, "span": [0, 23], "__ref_s3_data": null}], "text": "1.2. Synthetic datasets", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.3624153137207, 77.53544616699219, 286.8395080566406, 111.57047271728516], "page": 11, "span": [0, 167], "__ref_s3_data": null}], "text": "Aiming to train and evaluate our models in a broader spectrum of table data we have synthesized four types of datasets. Each one contains tables with different appear-", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9020690917969, 584.572265625, 545.1925659179688, 630.37939453125], "page": 11, "span": [0, 221], "__ref_s3_data": null}], "text": "ances in regard to their size, structure, style and content. Every synthetic dataset contains 150k examples, summing up to 600k synthetic examples. All datasets are divided into Train, Test and Val splits (80%, 10%, 10%).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.86920166015625, 559.6962890625, 545.1150512695312, 581.7109375], "page": 11, "span": [0, 89], "__ref_s3_data": null}], "text": "The process of generating a synthetic dataset can be decomposed into the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [307.9952697753906, 475.45721435546875, 545.2145385742188, 557.0147705078125], "page": 11, "span": [0, 373], "__ref_s3_data": null}], "text": "1. Prepare styling and content templates: The styling templates have been manually designed and organized into groups of scope specific appearances (e.g. financial data, marketing data, etc.) Additionally, we have prepared curated collections of content templates by extracting the most frequently used terms out of non-synthetic datasets (e.g. PubTabNet, FinTabNet, etc.).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.81939697265625, 342.9166259765625, 545.316650390625, 472.3519592285156], "page": 11, "span": [0, 573], "__ref_s3_data": null}], "text": "2. Generate table structures: The structure of each synthetic dataset assumes a horizontal table header which potentially spans over multiple rows and a table body that may contain a combination of row spans and column spans. However, spans are not allowed to cross the header - body boundary. The table structure is described by the parameters: Total number of table rows and columns, number of header rows, type of spans (header only spans, row only spans, column only spans, both row and column spans), maximum span size and the ratio of the table area covered by spans.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.7553405761719, 294.5247802734375, 545.6655883789062, 340.7125244140625], "page": 11, "span": [0, 195], "__ref_s3_data": null}], "text": "3. Generate content: Based on the dataset theme , a set of suitable content templates is chosen first. Then, this content can be combined with purely random text to produce the synthetic content.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.8453674316406, 245.37010192871094, 545.1974487304688, 291.67205810546875], "page": 11, "span": [0, 218], "__ref_s3_data": null}], "text": "4. Apply styling templates: Depending on the domain of the synthetic dataset, a set of styling templates is first manually selected. Then, a style is randomly selected to format the appearance of the synthesized table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.942626953125, 184.91055297851562, 545.261962890625, 243.14846801757812], "page": 11, "span": [0, 238], "__ref_s3_data": null}], "text": "5. Render the complete tables: The synthetic table is finally rendered by a web browser engine to generate the bounding boxes for each table cell. A batching technique is utilized to optimize the runtime overhead of the rendering process.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.89263916015625, 145.01368713378906, 545.1087646484375, 170.1573028564453], "page": 11, "span": [0, 47], "__ref_s3_data": null}], "text": "2. Prediction post-processing for PDF documents", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [308.06634521484375, 77.58016204833984, 545.1151733398438, 135.6042938232422], "page": 11, "span": [0, 247], "__ref_s3_data": null}], "text": "Although TableFormer can predict the table structure and the bounding boxes for tables recognized inside PDF documents, this is not enough when a full reconstruction of the original table is required. This happens mainly due the following reasons:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [292.63104248046875, 48.96039962768555, 302.5936279296875, 58.58494567871094], "page": 11, "span": [0, 2], "__ref_s3_data": null}], "text": "11", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [49.27131271362305, 605.3292236328125, 545.1137084960938, 627.6530151367188], "page": 12, "span": [0, 245], "__ref_s3_data": null}], "text": "Figure 7: Distribution of the tables across different dimensions per dataset. Simple vs complex tables per dataset and split, strict vs non strict html structures per dataset and table complexity, missing bboxes per dataset and table complexity.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/9"}, {"prov": [{"bbox": [61.34596252441406, 560.20703125, 286.3651123046875, 581.5670166015625], "page": 12, "span": [0, 61], "__ref_s3_data": null}], "text": "\u00b7 TableFormer output does not include the table cell content.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [61.074283599853516, 526.74169921875, 286.97015380859375, 548.9605102539062], "page": 12, "span": [0, 77], "__ref_s3_data": null}], "text": "\u00b7 There are occasional inaccuracies in the predictions of the bounding boxes.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.33855056762695, 396.2931213378906, 286.4153747558594, 513.664306640625], "page": 12, "span": [0, 545], "__ref_s3_data": null}], "text": "However, it is possible to mitigate those limitations by combining the TableFormer predictions with the information already present inside a programmatic PDF document. More specifically, PDF documents can be seen as a sequence of PDF cells where each cell is described by its content and bounding box. If we are able to associate the PDF cells with the predicted table cells, we can directly link the PDF cell content to the table cell structure and use the PDF bounding boxes to correct misalignments in the predicted table cell bounding boxes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.47560119628906, 371.6419677734375, 286.3649597167969, 393.8466491699219], "page": 12, "span": [0, 68], "__ref_s3_data": null}], "text": "Here is a step-by-step description of the prediction postprocessing:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.45584487915039, 335.4472351074219, 286.3802185058594, 369.84210205078125], "page": 12, "span": [0, 173], "__ref_s3_data": null}], "text": "1. Get the minimal grid dimensions - number of rows and columns for the predicted table structure. This represents the most granular grid for the underlying table structure.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.4388313293457, 287.7532043457031, 286.4390869140625, 333.490234375], "page": 12, "span": [0, 187], "__ref_s3_data": null}], "text": "2. Generate pair-wise matches between the bounding boxes of the PDF cells and the predicted cells. The Intersection Over Union (IOU) metric is used to evaluate the quality of the matches.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.394752502441406, 263.5272216796875, 286.36492919921875, 285.01922607421875], "page": 12, "span": [0, 97], "__ref_s3_data": null}], "text": "3. Use a carefully selected IOU threshold to designate the matches as \"good\" ones and \"bad\" ones.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.28644943237305, 227.34722900390625, 286.3651123046875, 261.0640563964844], "page": 12, "span": [0, 131], "__ref_s3_data": null}], "text": "3.a. If all IOU scores in a column are below the threshold, discard all predictions (structure and bounding boxes) for that column.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.466670989990234, 190.80416870117188, 286.50482177734375, 224.85504150390625], "page": 12, "span": [0, 169], "__ref_s3_data": null}], "text": "4. Find the best-fitting content alignment for the predicted cells with good IOU per each column. The alignment of the column can be identified by the following formula:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [110.48045349121094, 137.08892822265625, 286.3623962402344, 169.79971313476562], "page": 12, "span": [0, 81], "__ref_s3_data": null}], "text": "alignment = arg min c { D$_{c}$ } D$_{c}$ = max { x$_{c}$ } - min { x$_{c}$ } (4)", "type": "equation", "name": "Formula", "font": null}, {"prov": [{"bbox": [49.242549896240234, 103.07321166992188, 286.36199951171875, 125.2325210571289], "page": 12, "span": [0, 103], "__ref_s3_data": null}], "text": "where c is one of { left, centroid, right } and x$_{c}$ is the xcoordinate for the corresponding point.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [49.43268966674805, 78.84821319580078, 286.3649597167969, 100.3372802734375], "page": 12, "span": [0, 110], "__ref_s3_data": null}], "text": "5. Use the alignment computed in step 4, to compute the median x -coordinate for all table columns and the me-", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.05322265625, 536.2962036132812, 545.2343139648438, 581.705810546875], "page": 12, "span": [0, 183], "__ref_s3_data": null}], "text": "dian cell size for all table cells. The usage of median during the computations, helps to eliminate outliers caused by occasional column spans which are usually wider than the normal.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [308.280029296875, 512.0361938476562, 545.1903686523438, 533.602294921875], "page": 12, "span": [0, 91], "__ref_s3_data": null}], "text": "6. Snap all cells with bad IOU to their corresponding median x -coordinates and cell sizes.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.955810546875, 404.08929443359375, 545.283935546875, 509.8694152832031], "page": 12, "span": [0, 471], "__ref_s3_data": null}], "text": "7. Generate a new set of pair-wise matches between the corrected bounding boxes and PDF cells. This time use a modified version of the IOU metric, where the area of the intersection between the predicted and PDF cells is divided by the PDF cell area. In case there are multiple matches for the same PDF cell, the prediction with the higher score is preferred. This covers the cases where the PDF cells are smaller than the area of predicted or corrected prediction cells.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.9190368652344, 332.00836181640625, 545.3485717773438, 401.61907958984375], "page": 12, "span": [0, 311], "__ref_s3_data": null}], "text": "8. In some rare occasions, we have noticed that TableFormer can confuse a single column as two. When the postprocessing steps are applied, this results with two predicted columns pointing to the same PDF column. In such case we must de-duplicate the columns according to highest total column intersection score.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.7784423828125, 223.864013671875, 545.276611328125, 329.9189758300781], "page": 12, "span": [0, 503], "__ref_s3_data": null}], "text": "9. Pick up the remaining orphan cells. There could be cases, when after applying all the previous post-processing steps, some PDF cells could still remain without any match to predicted cells. However, it is still possible to deduce the correct matching for an orphan PDF cell by mapping its bounding box on the geometry of the grid. This mapping decides if the content of the orphan cell will be appended to an already matched table cell, or a new table cell should be created to match with the orphan.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [307.93115234375, 187.8454132080078, 545.3099365234375, 221.8761444091797], "page": 12, "span": [0, 113], "__ref_s3_data": null}], "text": "9a. Compute the top and bottom boundary of the horizontal band for each grid row (min/max y coordinates per row).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.345458984375, 163.541015625, 545.1150512695312, 185.8941192626953], "page": 12, "span": [0, 101], "__ref_s3_data": null}], "text": "9b. Intersect the orphan's bounding box with the row bands, and map the cell to the closest grid row.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.0950622558594, 127.3694076538086, 545.1150512695312, 161.0072479248047], "page": 12, "span": [0, 117], "__ref_s3_data": null}], "text": "9c. Compute the left and right boundary of the vertical band for each grid column (min/max x coordinates per column).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.24517822265625, 102.65910339355469, 545.114990234375, 124.94414520263672], "page": 12, "span": [0, 107], "__ref_s3_data": null}], "text": "9d. Intersect the orphan's bounding box with the column bands, and map the cell to the closest grid column.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [308.1419677734375, 78.71697998046875, 545.1151733398438, 100.19052124023438], "page": 12, "span": [0, 118], "__ref_s3_data": null}], "text": "9e. If the table cell under the identified row and column is not empty, extend its content with the content of the or-", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [292.6310729980469, 48.96040725708008, 302.5936584472656, 58.750736236572266], "page": 12, "span": [0, 2], "__ref_s3_data": null}], "text": "12", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [49.6470832824707, 707.8850708007812, 88.84658813476562, 717.4002685546875], "page": 13, "span": [0, 10], "__ref_s3_data": null}], "text": "phan cell.", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [49.755611419677734, 683.8928833007812, 286.63763427734375, 705.5321044921875], "page": 13, "span": [0, 76], "__ref_s3_data": null}], "text": "9f. Otherwise create a new structural cell and match it wit the orphan cell.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [49.36106491088867, 659.6165161132812, 286.364990234375, 681.5235595703125], "page": 13, "span": [0, 97], "__ref_s3_data": null}], "text": "Aditional images with examples of TableFormer predictions and post-processing can be found below.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [62.86752700805664, 281.0370788574219, 273.1334228515625, 290.6253662109375], "page": 13, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 8: Example of a table with multi-line header.", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [292.6309814453125, 48.960079193115234, 302.59356689453125, 58.455711364746094], "page": 13, "span": [0, 2], "__ref_s3_data": null}], "text": "13", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [308.1914367675781, 464.54010009765625, 545.1151123046875, 486.05615234375], "page": 13, "span": [0, 67], "__ref_s3_data": null}], "text": "Figure 9: Example of a table with big empty distance between cells.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/10"}, {"prov": [{"bbox": [311.8465576171875, 102.13106536865234, 541.63232421875, 112.08642578125], "page": 13, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 10: Example of a complex table with empty cells.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/11"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/12"}, {"prov": [{"bbox": [49.5418815612793, 414.36810302734375, 286.3650817871094, 436.19610595703125], "page": 14, "span": [0, 61], "__ref_s3_data": null}], "text": "Figure 11: Simple table with different style and empty cells.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/13"}, {"prov": [{"bbox": [54.174434661865234, 110.72535705566406, 281.85589599609375, 120.68132781982422], "page": 14, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 12: Simple table predictions and post processing.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/14"}, {"prov": [{"bbox": [292.6309814453125, 48.96007537841797, 302.59356689453125, 58.46809768676758], "page": 14, "span": [0, 2], "__ref_s3_data": null}], "text": "14", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [315.2740478515625, 410.7093200683594, 538.1852416992188, 420.72198486328125], "page": 14, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 13: Table predictions example on colorful table.", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [344.60076904296875, 99.54707336425781, 508.9893493652344, 109.21481323242188], "page": 14, "span": [0, 40], "__ref_s3_data": null}], "text": "Figure 14: Example with multi-line text.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/15"}, {"prov": [{"bbox": [308.0424499511719, 118.20308685302734, 545.193115234375, 139.58673095703125], "page": 15, "span": [0, 106], "__ref_s3_data": null}], "text": "Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/16"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/17"}, {"prov": [{"bbox": [83.66278839111328, 138.1688690185547, 252.24224853515625, 147.89190673828125], "page": 15, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 15: Example with triangular table.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/18"}, {"prov": [{"bbox": [292.6309814453125, 48.9600944519043, 302.59356689453125, 58.5789794921875], "page": 15, "span": [0, 2], "__ref_s3_data": null}], "text": "15", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [49.661865234375, 262.3688049316406, 545.1138305664062, 284.1699523925781], "page": 16, "span": [0, 153], "__ref_s3_data": null}], "text": "Figure 17: Example of long table. End-to-end example from initial PDF cells to prediction of bounding boxes, post processing and prediction of structure.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/19"}, {"prov": [{"bbox": [292.6309814453125, 48.960079193115234, 302.5961608886719, 58.51115036010742], "page": 16, "span": [0, 2], "__ref_s3_data": null}], "text": "16", "type": "page-footer", "name": "Page-footer", "font": null}], "figures": [{"prov": [{"bbox": [314.3843994140625, 382.2417297363281, 539.5308837890625, 453.7343444824219], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [311.3420715332031, 540.9691162109375, 550.2800903320312, 713.871826171875], "page": 3, "span": [0, 104], "__ref_s3_data": null}], "text": "Figure 2: Distribution of the tables across different table dimensions in PubTabNet + FinTabNet datasets", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [74.15921783447266, 607.8399658203125, 520.050537109375, 713.440185546875], "page": 5, "span": [0, 212], "__ref_s3_data": null}], "text": "Figure 3: TableFormer takes in an image of the PDF and creates bounding box and HTML structure predictions that are synchronized. The bounding boxes grabs the content from the PDF and inserts it in the structure.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [53.81645202636719, 284.83001708984375, 284.68927001953125, 533.7860717773438], "page": 5, "span": [0, 745], "__ref_s3_data": null}], "text": "Figure 4: Given an input image of a table, the Encoder produces fixed-length features that represent the input image. The features are then passed to both the Structure Decoder and Cell BBox Decoder . During training, the Structure Decoder receives 'tokenized tags' of the HTML code that represent the table structure. Afterwards, a transformer encoder and decoder architecture is employed to produce features that are received by a linear layer, and the Cell BBox Decoder. The linear layer is applied to the features to predict the tags. Simultaneously, the Cell BBox Decoder selects features referring to the data cells (' < td > ', ' < ') and passes them through an attention network, an MLP, and a linear layer to predict the bounding boxes.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [50.235816955566406, 604.4113159179688, 302.18707275390625, 687.9998168945312], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [304.36199951171875, 611.2498168945312, 555.037109375, 690.0220947265625], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [51.7950439453125, 348.8772888183594, 211.3548126220703, 411.7484436035156], "page": 8, "span": [0, 397], "__ref_s3_data": null}], "text": "Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [382.25286865234375, 349.6600341796875, 542.1312255859375, 410.2227783203125], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [216.797119140625, 349.422607421875, 375.72662353515625, 411.4811706542969], "page": 8, "span": [0, 112], "__ref_s3_data": null}], "text": "Figure 6: An example of TableFormer predictions (bounding boxes and structure) from generated SynthTabNet table.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [52.74595642089844, 644.7669677734375, 544.16552734375, 717.1785888671875], "page": 12, "span": [0, 245], "__ref_s3_data": null}], "text": "Figure 7: Distribution of the tables across different dimensions per dataset. Simple vs complex tables per dataset and split, strict vs non strict html structures per dataset and table complexity, missing bboxes per dataset and table complexity.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [309.5037841796875, 497.8887634277344, 555.8611450195312, 696.404052734375], "page": 13, "span": [0, 67], "__ref_s3_data": null}], "text": "Figure 9: Example of a table with big empty distance between cells.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [334.1772155761719, 126.88340759277344, 518.6530151367188, 198.84474182128906], "page": 13, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 10: Example of a complex table with empty cells.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [52.312522888183594, 537.9481201171875, 167.34197998046875, 577.6830444335938], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [51.631805419921875, 448.1211242675781, 283.3851013183594, 518.947265625], "page": 14, "span": [0, 61], "__ref_s3_data": null}], "text": "Figure 11: Simple table with different style and empty cells.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [50.32429885864258, 136.2794647216797, 177.11224365234375, 180.9558563232422], "page": 14, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 12: Simple table predictions and post processing.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [320.02325439453125, 199.1812744140625, 519.2925415039062, 244.82144165039062], "page": 14, "span": [0, 40], "__ref_s3_data": null}], "text": "Figure 14: Example with multi-line text.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [54.9669075012207, 543.454345703125, 279.7747802734375, 657.51416015625], "page": 15, "span": [0, 106], "__ref_s3_data": null}], "text": "Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [50.68792724609375, 160.98660278320312, 320.17071533203125, 287.21685791015625], "page": 15, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [353.733642578125, 156.9563751220703, 495.4932861328125, 306.0447692871094], "page": 15, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 15: Example with triangular table.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [66.34233856201172, 295.27490234375, 528.1603393554688, 537.7723999023438], "page": 16, "span": [0, 153], "__ref_s3_data": null}], "text": "Figure 17: Example of long table. End-to-end example from initial PDF cells to prediction of bounding boxes, post processing and prediction of structure.", "type": "figure", "bounding-box": null}], "tables": [{"prov": [{"bbox": [315.6885681152344, 489.5033874511719, 537.0928344726562, 561.0180053710938], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "Tables organize valuable content in a concise and compact representation. This content is extremely valuable for systems such as search engines, Knowledge Graph's, etc, since they enhance their predictive capabilities. Unfortunately, tables come in a large variety of shapes and sizes. Furthermore, they can have complex column/row-header configurations, multiline rows, different variety of separation lines, missing entries, etc. As such, the correct identification of the table-structure from an image is a nontrivial task. In this paper, we present a new table-structure identification model. The latter improves the latest end-toend deep learning model (i.e. encoder-dual-decoder from PubTabNet) in two significant ways. First, we introduce a new object detection decoder for table-cells. In this way, we can obtain the content of the table-cells from programmatic PDF's directly from the PDF source and avoid the training of the custom OCR decoders. This architectural change leads to more accurate table-content extraction and allows us to tackle non-english tables. Second, we replace the LSTM decoders with transformer based decoders. This upgrade improves significantly the previous state-of-the-art tree-editing-distance-score (TEDS) from 91% to 98.5% on simple tables and from 88.7% to 95% on complex tables.", "type": "table", "#-cols": 3, "#-rows": 2, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [384.03289794921875, 529.1906127929688, 390.0376892089844, 539.321044921875], "spans": [[0, 1]], "text": "3", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [451.9457092285156, 546.5225219726562, 457.95050048828125, 556.6529541015625], "spans": [[0, 2]], "text": "1", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [331.1968078613281, 512.5169067382812, 337.20159912109375, 522.6473388671875], "spans": [[1, 0]], "text": "2", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [315.6885681152344, 295.8706359863281, 536.98681640625, 357.77044677734375], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 1: Picture of a table with subtle, complex features such as (1) multi-column headers, (2) cell with multi-row text and (3) cells with no content. Image from PubTabNet evaluation set, filename: 'PMC2944238 004 02'.", "type": "table", "#-cols": 6, "#-rows": 5, "data": [[{"bbox": [318.8807067871094, 345.5291748046875, 323.273193359375, 354.3141174316406], "spans": [[0, 0]], "text": "0", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [347.24871826171875, 345.5291748046875, 351.6412048339844, 354.3141174316406], "spans": [[0, 1], [0, 2]], "text": "1", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [347.24871826171875, 345.5291748046875, 351.6412048339844, 354.3141174316406], "spans": [[0, 1], [0, 2]], "text": "1", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [394.1042175292969, 344.2760009765625, 465.8810119628906, 354.4064025878906], "spans": [[0, 3], [0, 4]], "text": "2 1", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [394.1042175292969, 344.2760009765625, 465.8810119628906, 354.4064025878906], "spans": [[0, 3], [0, 4]], "text": "2 1", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [318.7731628417969, 333.6695556640625, 323.1656494140625, 342.4544982910156], "spans": [[1, 0]], "text": "3", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [347.24871826171875, 333.6695556640625, 351.6412048339844, 342.4544982910156], "spans": [[1, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [366.7010192871094, 332.748779296875, 398.4967041015625, 342.8791809082031], "spans": [[1, 2]], "text": "5 3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [440.95941162109375, 333.6695556640625, 445.3518981933594, 342.4544982910156], "spans": [[1, 3]], "text": "6", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [487.8149108886719, 333.6695556640625, 492.2073974609375, 342.4544982910156], "spans": [[1, 4]], "text": "7", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [318.7731628417969, 309.51080322265625, 323.1656494140625, 318.2957458496094], "spans": [[2, 0]], "text": "8", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [347.24871826171875, 321.3704528808594, 351.6412048339844, 330.1553955078125], "spans": [[2, 1]], "text": "9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [394.1042175292969, 321.3704528808594, 402.8883056640625, 330.1553955078125], "spans": [[2, 2]], "text": "10", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [440.95941162109375, 321.3704528808594, 449.4228515625, 330.1553955078125], "spans": [[2, 3]], "text": "11", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [487.8149108886719, 321.3704528808594, 496.5989990234375, 330.1553955078125], "spans": [[2, 4]], "text": "12", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [331.90423583984375, 308.54669189453125, 337.9090270996094, 318.6770935058594], "spans": [[2, 5], [3, 5], [4, 5]], "text": "2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 5]}], [{"bbox": null, "spans": [[3, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [347.24871826171875, 309.51080322265625, 356.0328063964844, 318.2957458496094], "spans": [[3, 1]], "text": "13", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [394.1042175292969, 309.51080322265625, 402.8883056640625, 318.2957458496094], "spans": [[3, 2]], "text": "14", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [440.95941162109375, 309.51080322265625, 449.7434997558594, 318.2957458496094], "spans": [[3, 3]], "text": "15", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [487.8149108886719, 309.51080322265625, 496.5989990234375, 318.2957458496094], "spans": [[3, 4]], "text": "16", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [331.90423583984375, 308.54669189453125, 337.9090270996094, 318.6770935058594], "spans": [[2, 5], [3, 5], [4, 5]], "text": "2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [2, 5]}], [{"bbox": null, "spans": [[4, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [347.24871826171875, 298.0903625488281, 356.0328063964844, 306.87530517578125], "spans": [[4, 1]], "text": "17", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [394.1042175292969, 298.0903625488281, 402.8883056640625, 306.87530517578125], "spans": [[4, 2]], "text": "18", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [440.95941162109375, 298.0903625488281, 449.7434997558594, 306.87530517578125], "spans": [[4, 3]], "text": "19", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [487.8149108886719, 298.0903625488281, 496.5989990234375, 306.87530517578125], "spans": [[4, 4]], "text": "20", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [331.90423583984375, 308.54669189453125, 337.9090270996094, 318.6770935058594], "spans": [[2, 5], [3, 5], [4, 5]], "text": "2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [2, 5]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [309.9828796386719, 636.4157104492188, 542.3903198242188, 719.2901611328125], "page": 4, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1: Both \"Combined-Tabnet\" and \"CombinedTabnet\" are variations of the following: (*) The CombinedTabnet dataset is the processed combination of PubTabNet and Fintabnet. (**) The combined dataset is the processed combination of PubTabNet, Fintabnet and TableBank.", "type": "table", "#-cols": 5, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [412.3320007324219, 709.4790649414062, 430.9023132324219, 718.3856201171875], "spans": [[0, 1]], "text": "Tags", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [442.857421875, 709.4790649414062, 464.4463806152344, 718.3856201171875], "spans": [[0, 2]], "text": "Bbox", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [477.78631591796875, 709.4790649414062, 494.9419250488281, 718.3856201171875], "spans": [[0, 3]], "text": "Size", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [508.2818603515625, 709.4790649414062, 536.9143676757812, 718.3856201171875], "spans": [[0, 4]], "text": "Format", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [317.05999755859375, 697.1260986328125, 361.64263916015625, 706.0326538085938], "spans": [[1, 0]], "text": "PubTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [417.8559875488281, 697.1161499023438, 425.37774658203125, 706.33154296875], "spans": [[1, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [449.89569091796875, 697.1161499023438, 457.4174499511719, 706.33154296875], "spans": [[1, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [476.4010009765625, 697.1260986328125, 496.3262023925781, 706.0326538085938], "spans": [[1, 3]], "text": "509k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [512.6349487304688, 697.1260986328125, 532.5601196289062, 706.0326538085938], "spans": [[1, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [317.05999755859375, 685.1710815429688, 359.4309387207031, 694.07763671875], "spans": [[2, 0]], "text": "FinTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [417.8559875488281, 685.1611328125, 425.37774658203125, 694.3765258789062], "spans": [[2, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [449.89569091796875, 685.1611328125, 457.4174499511719, 694.3765258789062], "spans": [[2, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [476.4010009765625, 685.1710815429688, 496.3262023925781, 694.07763671875], "spans": [[2, 3]], "text": "112k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [513.4618530273438, 685.1710815429688, 531.7332763671875, 694.07763671875], "spans": [[2, 4]], "text": "PDF", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [317.05999755859375, 673.215087890625, 359.9788818359375, 682.1216430664062], "spans": [[3, 0]], "text": "TableBank", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [417.8559875488281, 673.2051391601562, 425.37774658203125, 682.4205322265625], "spans": [[3, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [450.812255859375, 673.2051391601562, 456.50091552734375, 682.4205322265625], "spans": [[3, 2]], "text": "7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [476.4010009765625, 673.215087890625, 496.3262023925781, 682.1216430664062], "spans": [[3, 3]], "text": "145k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [511.25018310546875, 673.215087890625, 533.9450073242188, 682.1216430664062], "spans": [[3, 4]], "text": "JPEG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [317.05999755859375, 661.2600708007812, 400.3772277832031, 670.1666259765625], "spans": [[4, 0]], "text": "Combined-Tabnet(*)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [417.8559875488281, 661.2501220703125, 425.37774658203125, 670.4655151367188], "spans": [[4, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [449.89569091796875, 661.2501220703125, 457.4174499511719, 670.4655151367188], "spans": [[4, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [476.4010009765625, 661.2600708007812, 496.3262023925781, 670.1666259765625], "spans": [[4, 3]], "text": "400k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [512.6349487304688, 661.2600708007812, 532.5601196289062, 670.1666259765625], "spans": [[4, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [317.05999755859375, 649.3050537109375, 375.1718444824219, 658.2116088867188], "spans": [[5, 0]], "text": "Combined(**)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [417.8559875488281, 649.2951049804688, 425.37774658203125, 658.510498046875], "spans": [[5, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [449.89569091796875, 649.2951049804688, 457.4174499511719, 658.510498046875], "spans": [[5, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [476.4010009765625, 649.3050537109375, 496.3262023925781, 658.2116088867188], "spans": [[5, 3]], "text": "500k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [512.6349487304688, 649.3050537109375, 532.5601196289062, 658.2116088867188], "spans": [[5, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [317.05999755859375, 637.3500366210938, 369.3935241699219, 646.256591796875], "spans": [[6, 0]], "text": "SynthTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [417.8559875488281, 637.3401489257812, 425.37774658203125, 646.5555419921875], "spans": [[6, 1]], "text": "3", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [449.89569091796875, 637.3401489257812, 457.4174499511719, 646.5555419921875], "spans": [[6, 2]], "text": "3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [476.4010009765625, 637.35009765625, 496.3262023925781, 646.2566528320312], "spans": [[6, 3]], "text": "600k", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [512.6349487304688, 637.35009765625, 532.5601196289062, 646.2566528320312], "spans": [[6, 4]], "text": "PNG", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [53.472068786621094, 210.4700927734375, 282.5771789550781, 382.98480224609375], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2: Structure results on PubTabNet (PTN), FinTabNet (FTN), TableBank (TB) and SynthTabNet (STN).", "type": "table", "#-cols": 5, "#-rows": 11, "data": [[{"bbox": [78.84300231933594, 362.403076171875, 104.8553466796875, 371.30963134765625], "spans": [[0, 0]], "text": "Model", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [129.33799743652344, 356.42608642578125, 159.21583557128906, 365.3326416015625], "spans": [[0, 1]], "text": "Dataset", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [171.17095947265625, 356.42608642578125, 199.40496826171875, 365.3326416015625], "spans": [[0, 2]], "text": "Simple", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [211.1999969482422, 356.42608642578125, 247.74349975585938, 377.2876281738281], "spans": [[0, 3]], "text": "TEDS Complex", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [264.5404357910156, 356.42608642578125, 277.27264404296875, 365.3326416015625], "spans": [[0, 4]], "text": "All", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [81.61199951171875, 339.4690856933594, 102.08513641357422, 348.3756408691406], "spans": [[1, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [134.87205505371094, 339.4690856933594, 153.69140625, 348.3756408691406], "spans": [[1, 1]], "text": "PTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [176.56553649902344, 339.4690856933594, 194.00009155273438, 348.3756408691406], "spans": [[1, 2]], "text": "91.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [220.82937622070312, 339.4690856933594, 238.26393127441406, 348.3756408691406], "spans": [[1, 3]], "text": "88.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [262.18414306640625, 339.4690856933594, 279.6186828613281, 348.3756408691406], "spans": [[1, 4]], "text": "89.9", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [82.16500091552734, 327.5130920410156, 101.53230285644531, 336.4196472167969], "spans": [[2, 0]], "text": "GTE", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [134.86715698242188, 327.5130920410156, 153.68650817871094, 336.4196472167969], "spans": [[2, 1]], "text": "PTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [183.62411499023438, 327.5130920410156, 186.94166564941406, 336.4196472167969], "spans": [[2, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [227.88795471191406, 327.5130920410156, 231.20550537109375, 336.4196472167969], "spans": [[2, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [259.69854736328125, 327.5130920410156, 282.1144104003906, 336.4196472167969], "spans": [[2, 4]], "text": "93.01", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [66.31500244140625, 314.9600830078125, 117.38329315185547, 323.86663818359375], "spans": [[3, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [134.86766052246094, 314.9600830078125, 153.68701171875, 323.86663818359375], "spans": [[3, 1]], "text": "PTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [176.57110595703125, 314.9600830078125, 194.0056610107422, 323.86663818359375], "spans": [[3, 2]], "text": "98.5", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [220.83494567871094, 314.9600830078125, 238.26950073242188, 323.86663818359375], "spans": [[3, 3]], "text": "95.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [259.697998046875, 315.0298156738281, 282.1138610839844, 323.9862060546875], "spans": [[3, 4]], "text": "96.75", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [81.61199951171875, 299.76708984375, 102.08513641357422, 308.67364501953125], "spans": [[4, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [134.87205505371094, 299.76708984375, 153.69140625, 308.67364501953125], "spans": [[4, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [176.56553649902344, 299.76708984375, 194.00009155273438, 308.67364501953125], "spans": [[4, 2]], "text": "88.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [218.33871459960938, 299.76708984375, 240.7545623779297, 308.67364501953125], "spans": [[4, 3]], "text": "92.08", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [262.1841125488281, 299.76708984375, 279.61865234375, 308.67364501953125], "spans": [[4, 4]], "text": "90.6", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [82.16500091552734, 287.8121032714844, 101.53230285644531, 296.7186584472656], "spans": [[5, 0]], "text": "GTE", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [134.86715698242188, 287.8121032714844, 153.68650817871094, 296.7186584472656], "spans": [[5, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [183.62411499023438, 287.8121032714844, 186.94166564941406, 296.7186584472656], "spans": [[5, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [227.88795471191406, 287.8121032714844, 231.20550537109375, 296.7186584472656], "spans": [[5, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [259.69854736328125, 287.8121032714844, 282.1144104003906, 296.7186584472656], "spans": [[5, 4]], "text": "87.14", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [71.78900146484375, 275.85711669921875, 111.90838623046875, 284.763671875], "spans": [[6, 0]], "text": "GTE (FT)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [134.86221313476562, 275.85711669921875, 153.6815643310547, 284.763671875], "spans": [[6, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [183.62913513183594, 275.85711669921875, 186.94668579101562, 284.763671875], "spans": [[6, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [227.89297485351562, 275.85711669921875, 231.2105255126953, 284.763671875], "spans": [[6, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [259.693603515625, 275.85711669921875, 282.1094665527344, 284.763671875], "spans": [[6, 4]], "text": "91.02", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [66.31500244140625, 263.9021301269531, 117.38329315185547, 272.8086853027344], "spans": [[7, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [134.86766052246094, 263.9021301269531, 153.68701171875, 272.8086853027344], "spans": [[7, 1]], "text": "FTN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [176.57110595703125, 263.9021301269531, 194.0056610107422, 272.8086853027344], "spans": [[7, 2]], "text": "97.5", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [220.83494567871094, 263.9021301269531, 238.26950073242188, 272.8086853027344], "spans": [[7, 3]], "text": "96.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [262.1889953613281, 263.97186279296875, 279.62353515625, 272.9282531738281], "spans": [[7, 4]], "text": "96.8", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [81.61199951171875, 246.59507751464844, 102.08513641357422, 255.5016326904297], "spans": [[8, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [137.91064453125, 246.59507751464844, 150.64285278320312, 255.5016326904297], "spans": [[8, 1]], "text": "TB", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [176.56553649902344, 246.59507751464844, 194.00009155273438, 255.5016326904297], "spans": [[8, 2]], "text": "86.0", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [227.89285278320312, 246.59507751464844, 231.2104034423828, 255.5016326904297], "spans": [[8, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [262.1841125488281, 246.59507751464844, 279.61865234375, 255.5016326904297], "spans": [[8, 4]], "text": "86.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [66.31500244140625, 234.6390838623047, 117.38329315185547, 243.54563903808594], "spans": [[9, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [137.90625, 234.6390838623047, 150.63845825195312, 243.54563903808594], "spans": [[9, 1]], "text": "TB", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [176.57110595703125, 234.6390838623047, 194.0056610107422, 243.54563903808594], "spans": [[9, 2]], "text": "89.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [227.88845825195312, 234.6390838623047, 231.2060089111328, 243.54563903808594], "spans": [[9, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [262.1889953613281, 234.7088165283203, 279.62353515625, 243.66519165039062], "spans": [[9, 4]], "text": "89.6", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [66.31500244140625, 215.09107971191406, 117.38329315185547, 223.9976348876953], "spans": [[10, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [134.86766052246094, 215.09107971191406, 153.68701171875, 223.9976348876953], "spans": [[10, 1]], "text": "STN", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [176.57110595703125, 215.09107971191406, 194.0056610107422, 223.9976348876953], "spans": [[10, 2]], "text": "96.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [220.83494567871094, 215.09107971191406, 238.26950073242188, 223.9976348876953], "spans": [[10, 3]], "text": "95.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [262.189697265625, 215.09107971191406, 279.6242370605469, 223.9976348876953], "spans": [[10, 4]], "text": "96.7", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [308.2708740234375, 487.9087829589844, 533.3538208007812, 544.2454833984375], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3: Cell Bounding Box detection results on PubTabNet, and FinTabNet. PP: Post-processing.", "type": "table", "#-cols": 4, "#-rows": 4, "data": [[{"bbox": [339.322998046875, 529.4290771484375, 365.3353576660156, 538.3356323242188], "spans": [[0, 0]], "text": "Model", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [401.04132080078125, 529.4290771484375, 430.9191589355469, 538.3356323242188], "spans": [[0, 1]], "text": "Dataset", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [454.1021423339844, 529.4290771484375, 474.5852355957031, 538.3356323242188], "spans": [[0, 2]], "text": "mAP", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [486.54034423828125, 529.4290771484375, 527.2276000976562, 538.3356323242188], "spans": [[0, 3]], "text": "mAP (PP)", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [327.656005859375, 512.4721069335938, 377.0007629394531, 521.378662109375], "spans": [[1, 0]], "text": "EDD+BBox", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [393.6980895996094, 512.4721069335938, 438.2807312011719, 521.378662109375], "spans": [[1, 1]], "text": "PubTabNet", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [455.6355895996094, 512.4721069335938, 473.07012939453125, 521.378662109375], "spans": [[1, 2]], "text": "79.2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [498.1659240722656, 512.4721069335938, 515.6004638671875, 521.378662109375], "spans": [[1, 3]], "text": "82.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [326.7950134277344, 500.5171203613281, 377.8633117675781, 509.4236755371094], "spans": [[2, 0]], "text": "TableFormer", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [393.6938781738281, 500.5171203613281, 438.2765197753906, 509.4236755371094], "spans": [[2, 1]], "text": "PubTabNet", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [455.6310119628906, 500.58685302734375, 473.0655517578125, 509.5432434082031], "spans": [[2, 2]], "text": "82.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [498.1712951660156, 500.58685302734375, 515.6058349609375, 509.5432434082031], "spans": [[2, 3]], "text": "86.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [326.7950134277344, 488.5621337890625, 377.8633117675781, 497.46868896484375], "spans": [[3, 0]], "text": "TableFormer", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [389.81842041015625, 488.5621337890625, 442.1519470214844, 497.46868896484375], "spans": [[3, 1]], "text": "SynthTabNet", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [455.63134765625, 488.5621337890625, 473.0658874511719, 497.46868896484375], "spans": [[3, 2]], "text": "87.7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [505.22515869140625, 488.5621337890625, 508.5426940917969, 497.46868896484375], "spans": [[3, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [332.6026611328125, 148.15008544921875, 520.7051391601562, 251.47610473632812], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 4: Results of structure with content retrieved using cell detection on PubTabNet. In all cases the input is PDF documents with cropped tables.", "type": "table", "#-cols": 4, "#-rows": 7, "data": [[{"bbox": [358.010986328125, 230.86007690429688, 384.0233459472656, 239.76663208007812], "spans": [[0, 0]], "text": "Model", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [408.5059814453125, 224.88307189941406, 436.739990234375, 233.7896270751953], "spans": [[0, 1]], "text": "Simple", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [448.6950988769531, 224.88307189941406, 485.0784912109375, 245.74462890625], "spans": [[0, 2]], "text": "TEDS Complex", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [499.3847961425781, 224.88307189941406, 512.1170043945312, 233.7896270751953], "spans": [[0, 3]], "text": "All", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [357.6820068359375, 207.92608642578125, 384.3518981933594, 216.8326416015625], "spans": [[1, 0]], "text": "Tabula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [413.9009704589844, 207.92608642578125, 431.33551025390625, 216.8326416015625], "spans": [[1, 1]], "text": "78.0", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [458.164794921875, 207.92608642578125, 475.5993347167969, 216.8326416015625], "spans": [[1, 2]], "text": "57.8", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [497.0289001464844, 207.92608642578125, 514.4634399414062, 216.8326416015625], "spans": [[1, 3]], "text": "67.9", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [350.7229919433594, 195.97108459472656, 391.3106384277344, 204.8776397705078], "spans": [[2, 0]], "text": "Traprange", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [413.90582275390625, 195.97108459472656, 431.3403625488281, 204.8776397705078], "spans": [[2, 1]], "text": "60.8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [458.1696472167969, 195.97108459472656, 475.60418701171875, 204.8776397705078], "spans": [[2, 2]], "text": "49.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [497.03375244140625, 195.97108459472656, 514.4683227539062, 204.8776397705078], "spans": [[2, 3]], "text": "55.4", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [354.135986328125, 184.0150909423828, 387.89923095703125, 192.92164611816406], "spans": [[3, 0]], "text": "Camelot", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [413.901611328125, 184.0150909423828, 431.3361511230469, 192.92164611816406], "spans": [[3, 1]], "text": "80.0", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [458.1654357910156, 184.0150909423828, 475.5999755859375, 192.92164611816406], "spans": [[3, 2]], "text": "66.0", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [497.029541015625, 184.0150909423828, 514.464111328125, 192.92164611816406], "spans": [[3, 3]], "text": "73.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [346.5589904785156, 172.06008911132812, 395.475341796875, 180.96664428710938], "spans": [[4, 0]], "text": "Acrobat Pro", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [413.9061584472656, 172.06008911132812, 431.3406982421875, 180.96664428710938], "spans": [[4, 1]], "text": "68.9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [458.16998291015625, 172.06008911132812, 475.6045227050781, 180.96664428710938], "spans": [[4, 2]], "text": "61.8", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [497.0340881347656, 172.06008911132812, 514.4686279296875, 180.96664428710938], "spans": [[4, 3]], "text": "65.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [360.781005859375, 160.10508728027344, 381.254150390625, 169.0116424560547], "spans": [[5, 0]], "text": "EDD", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [413.9015808105469, 160.10508728027344, 431.33612060546875, 169.0116424560547], "spans": [[5, 1]], "text": "91.2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [458.1654052734375, 160.10508728027344, 475.5999450683594, 169.0116424560547], "spans": [[5, 2]], "text": "85.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [497.0295104980469, 160.10508728027344, 514.4640502929688, 169.0116424560547], "spans": [[5, 3]], "text": "88.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [345.4830017089844, 148.15008544921875, 396.5513000488281, 157.056640625], "spans": [[6, 0]], "text": "TableFormer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [413.9061584472656, 148.15008544921875, 431.3406982421875, 157.056640625], "spans": [[6, 1]], "text": "95.4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [458.16998291015625, 148.15008544921875, 475.6045227050781, 157.056640625], "spans": [[6, 2]], "text": "90.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [497.03399658203125, 148.21981811523438, 514.4685668945312, 157.1761932373047], "spans": [[6, 3]], "text": "93.6", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [53.395973205566406, 498.96612548828125, 298.77838134765625, 573.2565307617188], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 6, "#-rows": 10, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [209.93284606933594, 565.6378784179688, 241.04458618164062, 569.8192749023438], "spans": [[0, 2], [0, 3]], "text": "\u8ad6\u6587\u30d5\u30a1\u30a4\u30eb", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [209.93284606933594, 565.6378784179688, 241.04458618164062, 569.8192749023438], "spans": [[0, 2], [0, 3]], "text": "\u8ad6\u6587\u30d5\u30a1\u30a4\u30eb", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [263.764892578125, 565.6378784179688, 284.5058898925781, 569.8192749023438], "spans": [[0, 4], [0, 5]], "text": "\u53c2\u8003\u6587\u732e", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [263.764892578125, 565.6378784179688, 284.5058898925781, 569.8192749023438], "spans": [[0, 4], [0, 5]], "text": "\u53c2\u8003\u6587\u732e", "type": "col_header", "col": 5, "col-header": false, "col-span": [4, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [110.24990844726562, 558.1526489257812, 120.62017822265625, 562.3340454101562], "spans": [[1, 0]], "text": "\u51fa\u5178", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [175.3660888671875, 558.1526489257812, 201.29246520996094, 562.3340454101562], "spans": [[1, 1]], "text": "\u30d5\u30a1\u30a4\u30eb \u6570", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [209.62408447265625, 558.1526489257812, 219.99435424804688, 562.3340454101562], "spans": [[1, 2]], "text": "\u82f1\u8a9e", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [229.19813537597656, 558.1526489257812, 244.75376892089844, 562.3340454101562], "spans": [[1, 3]], "text": "\u65e5\u672c\u8a9e", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [256.11419677734375, 558.1526489257812, 266.4844665527344, 562.3340454101562], "spans": [[1, 4]], "text": "\u82f1\u8a9e", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [278.38433837890625, 558.1526489257812, 293.9399719238281, 562.3340454101562], "spans": [[1, 5]], "text": "\u65e5\u672c\u8a9e", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [55.530521392822266, 551.2162475585938, 162.71310424804688, 555.5741577148438], "spans": [[2, 0]], "text": "Association for Computational Linguistics(ACL2003)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [184.39730834960938, 551.2162475585938, 189.56455993652344, 555.5741577148438], "spans": [[2, 1]], "text": "65", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [208.99026489257812, 551.2162475585938, 214.1575164794922, 555.5741577148438], "spans": [[2, 2]], "text": "65", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [234.8751678466797, 551.2162475585938, 237.4583282470703, 555.5741577148438], "spans": [[2, 3]], "text": "0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [256.88446044921875, 551.2162475585938, 264.63580322265625, 555.5741577148438], "spans": [[2, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [284.06134033203125, 551.2162475585938, 286.6445007324219, 555.5741577148438], "spans": [[2, 5]], "text": "0", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [55.530521392822266, 545.0216064453125, 139.7225341796875, 549.3795166015625], "spans": [[3, 0]], "text": "Computational Linguistics(COLING2002)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [183.10536193847656, 545.0216064453125, 190.85670471191406, 549.3795166015625], "spans": [[3, 1]], "text": "140", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [207.6983184814453, 545.0216064453125, 215.4496612548828, 549.3795166015625], "spans": [[3, 2]], "text": "140", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [234.8751678466797, 545.0216064453125, 237.4583282470703, 549.3795166015625], "spans": [[3, 3]], "text": "0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [256.88446044921875, 545.0216064453125, 264.63580322265625, 549.3795166015625], "spans": [[3, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [284.06134033203125, 545.0216064453125, 286.6445007324219, 549.3795166015625], "spans": [[3, 5]], "text": "0", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [55.530521392822266, 538.0201416015625, 128.96026611328125, 542.4105834960938], "spans": [[4, 0]], "text": "\u96fb\u6c17\u60c5\u5831\u901a\u4fe1\u5b66\u4f1a 2003 \u5e74\u7dcf\u5408\u5927\u4f1a", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [183.10536193847656, 538.8270263671875, 190.85670471191406, 543.1849365234375], "spans": [[4, 1]], "text": "150", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [210.2822265625, 538.8270263671875, 212.86538696289062, 543.1849365234375], "spans": [[4, 2]], "text": "8", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [232.29153442382812, 538.8270263671875, 240.04287719726562, 543.1849365234375], "spans": [[4, 3]], "text": "142", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [256.88446044921875, 538.8270263671875, 264.63580322265625, 543.1849365234375], "spans": [[4, 4]], "text": "223", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [281.4774169921875, 538.8270263671875, 289.228759765625, 543.1849365234375], "spans": [[4, 5]], "text": "147", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [55.530521392822266, 530.534912109375, 129.88177490234375, 534.9253540039062], "spans": [[5, 0]], "text": "\u60c5\u5831\u51e6\u7406\u5b66\u4f1a\u7b2c 65 \u56de\u5168\u56fd\u5927\u4f1a (2003)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [183.10536193847656, 531.341796875, 190.85670471191406, 535.69970703125], "spans": [[5, 1]], "text": "177", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [210.2822265625, 531.341796875, 212.86538696289062, 535.69970703125], "spans": [[5, 2]], "text": "1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [232.29153442382812, 531.341796875, 240.04287719726562, 535.69970703125], "spans": [[5, 3]], "text": "176", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [256.88446044921875, 531.341796875, 264.63580322265625, 535.69970703125], "spans": [[5, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [281.4774169921875, 531.341796875, 289.228759765625, 535.69970703125], "spans": [[5, 5]], "text": "236", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [55.530521392822266, 523.3078002929688, 129.88177490234375, 527.6982421875], "spans": [[6, 0]], "text": "\u7b2c 17 \u56de\u4eba\u5de5\u77e5\u80fd\u5b66\u4f1a\u5168\u56fd\u5927\u4f1a (2003)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [183.10536193847656, 524.1146850585938, 190.85670471191406, 528.4725952148438], "spans": [[6, 1]], "text": "208", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [210.2822265625, 524.1146850585938, 212.86538696289062, 528.4725952148438], "spans": [[6, 2]], "text": "5", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [232.29153442382812, 524.1146850585938, 240.04287719726562, 528.4725952148438], "spans": [[6, 3]], "text": "203", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [256.88446044921875, 524.1146850585938, 264.63580322265625, 528.4725952148438], "spans": [[6, 4]], "text": "152", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [281.4774169921875, 524.1146850585938, 289.228759765625, 528.4725952148438], "spans": [[6, 5]], "text": "244", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [55.530521392822266, 516.0807495117188, 127.32453918457031, 520.47119140625], "spans": [[7, 0]], "text": "\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u7814\u7a76\u4f1a\u7b2c 146 \u301c 155 \u56de", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [184.39730834960938, 516.8876342773438, 189.56455993652344, 521.2455444335938], "spans": [[7, 1]], "text": "98", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [210.2822265625, 516.8876342773438, 212.86538696289062, 521.2455444335938], "spans": [[7, 2]], "text": "2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [233.58348083496094, 516.8876342773438, 238.750732421875, 521.2455444335938], "spans": [[7, 3]], "text": "96", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [256.88446044921875, 516.8876342773438, 264.63580322265625, 521.2455444335938], "spans": [[7, 4]], "text": "150", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [281.4774169921875, 516.8876342773438, 289.228759765625, 521.2455444335938], "spans": [[7, 5]], "text": "232", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [55.530521392822266, 508.59564208984375, 110.16829681396484, 512.986083984375], "spans": [[8, 0]], "text": "WWW \u304b\u3089\u53ce\u96c6\u3057\u305f\u8ad6\u6587", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [183.10536193847656, 509.6605224609375, 190.85670471191406, 514.0184326171875], "spans": [[8, 1]], "text": "107", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [208.99026489257812, 509.6605224609375, 214.1575164794922, 514.0184326171875], "spans": [[8, 2]], "text": "73", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [233.58348083496094, 509.6605224609375, 238.750732421875, 514.0184326171875], "spans": [[8, 3]], "text": "34", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [256.88446044921875, 509.6605224609375, 264.63580322265625, 514.0184326171875], "spans": [[8, 4]], "text": "147", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [282.7693786621094, 509.6605224609375, 287.9366149902344, 514.0184326171875], "spans": [[8, 5]], "text": "96", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": null, "spans": [[9, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [183.10536193847656, 502.1754150390625, 190.85670471191406, 506.5333251953125], "spans": [[9, 1]], "text": "945", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [207.6983184814453, 502.1754150390625, 215.4496612548828, 506.5333251953125], "spans": [[9, 2]], "text": "294", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [232.29153442382812, 502.1754150390625, 240.04287719726562, 506.5333251953125], "spans": [[9, 3]], "text": "651", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [255.7650604248047, 502.1754150390625, 265.7520446777344, 506.5333251953125], "spans": [[9, 4]], "text": "1122", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [281.4774169921875, 502.1754150390625, 289.228759765625, 506.5333251953125], "spans": [[9, 5]], "text": "955", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [304.5496826171875, 504.4200439453125, 550.3656005859375, 573.4367065429688], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "Text is aligned to match original for ease of viewing", "type": "table", "#-cols": 5, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [392.0967102050781, 565.3603515625, 438.0144958496094, 570.425537109375], "spans": [[0, 1], [0, 2]], "text": "Shares (in millions)", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [392.0967102050781, 565.3603515625, 438.0144958496094, 570.425537109375], "spans": [[0, 1], [0, 2]], "text": "Shares (in millions)", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [459.0486145019531, 559.1006469726562, 542.0001831054688, 570.3758544921875], "spans": [[0, 3], [0, 4]], "text": "Weighted Average Grant Date Fair Value", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [459.0486145019531, 559.1006469726562, 542.0001831054688, 570.3758544921875], "spans": [[0, 3], [0, 4]], "text": "Weighted Average Grant Date Fair Value", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [393.24420166015625, 550.1876831054688, 407.3463134765625, 555.2528686523438], "spans": [[1, 1]], "text": "RS U s", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.1832275390625, 550.1876831054688, 440.98779296875, 555.2528686523438], "spans": [[1, 2]], "text": "PSUs", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [468.3825378417969, 550.1876831054688, 482.4846496582031, 555.2528686523438], "spans": [[1, 3]], "text": "RSUs", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [516.92578125, 550.1876831054688, 530.7303466796875, 555.2528686523438], "spans": [[1, 4]], "text": "PSUs", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [306.11492919921875, 542.323974609375, 364.65606689453125, 547.38916015625], "spans": [[2, 0]], "text": "Nonvested on Janua ry 1", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [396.2466125488281, 542.0215454101562, 403.75531005859375, 547.0867309570312], "spans": [[2, 1]], "text": "1. 1", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [429.8183898925781, 542.0215454101562, 437.32708740234375, 547.0867309570312], "spans": [[2, 2]], "text": "0.3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [465.5285949707031, 542.0215454101562, 483.5500183105469, 547.0867309570312], "spans": [[2, 3]], "text": "90.10 $", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [513.4482421875, 542.0215454101562, 531.4696655273438, 547.0867309570312], "spans": [[2, 4]], "text": "$ 91.19", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [306.11492919921875, 533.2503051757812, 325.6267395019531, 538.3154907226562], "spans": [[3, 0]], "text": "Granted", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [396.2466125488281, 533.2503051757812, 403.75531005859375, 538.3154907226562], "spans": [[3, 1]], "text": "0. 5", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [429.8183898925781, 533.2503051757812, 437.32708740234375, 538.3154907226562], "spans": [[3, 2]], "text": "0.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [466.435791015625, 533.2503051757812, 482.5483093261719, 538.3154907226562], "spans": [[3, 3]], "text": "117.44", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [514.2906494140625, 533.2503051757812, 530.809814453125, 538.3154907226562], "spans": [[3, 4]], "text": "122.41", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [306.11492919921875, 525.3865966796875, 322.628662109375, 530.4517822265625], "spans": [[4, 0]], "text": "Vested", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [394.4322204589844, 525.3865966796875, 405.5362548828125, 530.4517822265625], "spans": [[4, 1]], "text": "(0. 5 )", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [427.70159912109375, 525.3865966796875, 438.8056335449219, 530.4517822265625], "spans": [[4, 2]], "text": "(0.1)", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [468.5553283691406, 525.3865966796875, 482.0704345703125, 530.4517822265625], "spans": [[4, 3]], "text": "87.08", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [516.0186157226562, 525.3865966796875, 529.5337524414062, 530.4517822265625], "spans": [[4, 4]], "text": "81.14", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [306.11492919921875, 517.2933349609375, 356.2477111816406, 522.3585205078125], "spans": [[5, 0]], "text": "Canceled or forfeited", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [394.4322204589844, 516.6153564453125, 405.5362548828125, 521.6805419921875], "spans": [[5, 1]], "text": "(0. 1 )", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [431.02801513671875, 516.6153564453125, 436.4280090332031, 521.6805419921875], "spans": [[5, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [465.83099365234375, 516.6153564453125, 482.3501281738281, 521.6805419921875], "spans": [[5, 3]], "text": "102.01", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [516.0186157226562, 516.6153564453125, 529.5337524414062, 521.6805419921875], "spans": [[5, 4]], "text": "92.18", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [306.11492919921875, 508.4490661621094, 373.3576354980469, 513.5142822265625], "spans": [[6, 0]], "text": "Nonvested on December 31", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [396.2466125488281, 508.4490661621094, 403.75531005859375, 513.5142822265625], "spans": [[6, 1]], "text": "1.0", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [429.5159912109375, 508.4490661621094, 437.0246887207031, 513.5142822265625], "spans": [[6, 2]], "text": "0.3", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [463.7142028808594, 508.4490661621094, 484.7396545410156, 513.5142822265625], "spans": [[6, 3]], "text": "104.85 $", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [512.99462890625, 508.4490661621094, 534.0200805664062, 513.5142822265625], "spans": [[6, 4]], "text": "$ 104.51", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}, {"height": 792.0, "page": 15, "width": 612.0}, {"height": 792.0, "page": 16, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data/2206.01062.doctags.txt b/tests/data/2206.01062.doctags.txt new file mode 100644 index 00000000..497b709b --- /dev/null +++ b/tests/data/2206.01062.doctags.txt @@ -0,0 +1,237 @@ + +DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis +Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com +Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com +Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com +Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com +Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com +ABSTRACT +Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis. +CCS CONCEPTS +· Information systems → Document structure ; · Applied computing → Document analysis ; · Computing methodologies → Machine learning ; Computer vision ; Object detection ; +Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s). +KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043 +Figure 1: Four examples of complex page layouts across different document categories +
+ +Figure 1: Four examples of complex page layouts across different document categories +
+KEYWORDS +PDF document conversion, layout segmentation, object-detection, data set, Machine Learning +ACM Reference Format: +Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043 +1 INTRODUCTION +Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1. +A key problem in the process of document conversion is to understand the structure of a single document page, i.e. which segments of text should be grouped together in a unit. To train models for this task, there are currently two large datasets available to the community, PubLayNet [6] and DocBank [7]. They were introduced in 2019 and 2020 respectively and significantly accelerated the implementation of layout detection and segmentation models due to their sizes of 300K and 500K ground-truth pages. These sizes were achieved by leveraging an automation approach. The benefit of automated ground-truth generation is obvious: one can generate large ground-truth datasets at virtually no cost. However, the automation introduces a constraint on the variability in the dataset, because corresponding structured source data must be available. PubLayNet and DocBank were both generated from scientific document repositories (PubMed and arXiv), which provide XML or L A T E X sources. Those scientific documents present a limited variability in their layouts, because they are typeset in uniform templates provided by the publishers. Obviously, documents such as technical manuals, annual company reports, legal text, government tenders, etc. have very different and partially unique layouts. As a consequence, the layout predictions obtained from models trained on PubLayNet or DocBank is very reasonable when applied on scientific documents. However, for more artistic or free-style layouts, we see sub-par prediction quality from these models, which we demonstrate in Section 5. +In this paper, we present the DocLayNet dataset. It provides pageby-page layout annotation ground-truth using bounding-boxes for 11 distinct class labels on 80863 unique document pages, of which a fraction carry double- or triple-annotations. DocLayNet is similar in spirit to PubLayNet and DocBank and will likewise be made available to the public 1 in order to stimulate the document-layout analysis community. It distinguishes itself in the following aspects: +(1) Human Annotation : In contrast to PubLayNet and DocBank, we relied on human annotation instead of automation approaches to generate the data set. +(2) Large Layout Variability : We include diverse and complex layouts from a large variety of public sources. +(3) Detailed Label Set : We define 11 class labels to distinguish layout features in high detail. PubLayNet provides 5 labels; DocBank provides 13, although not a superset of ours. +(4) Redundant Annotations : A fraction of the pages in the DocLayNet data set carry more than one human annotation. +This enables experimentation with annotation uncertainty and quality control analysis. +(5) Pre-defined Train-, Test- & Validation-set : Like DocBank, we provide fixed train-, test- & validation-sets to ensure proportional representation of the class-labels. Further, we prevent leakage of unique layouts across sets, which has a large effect on model accuracy scores. +All aspects outlined above are detailed in Section 3. In Section 4, we will elaborate on how we designed and executed this large-scale human annotation campaign. We will also share key insights and lessons learned that might prove helpful for other parties planning to set up annotation campaigns. +In Section 5, we will present baseline accuracy numbers for a variety of object detection methods (Faster R-CNN, Mask R-CNN and YOLOv5) trained on DocLayNet. We further show how the model performance is impacted by varying the DocLayNet dataset size, reducing the label set and modifying the train/test-split. Last but not least, we compare the performance of models trained on PubLayNet, DocBank and DocLayNet and demonstrate that a model trained on DocLayNet provides overall more robust layout recovery. +2 RELATED WORK +While early approaches in document-layout analysis used rulebased algorithms and heuristics [8], the problem is lately addressed with deep learning methods. The most common approach is to leverage object detection models [9-15]. In the last decade, the accuracy and speed of these models has increased dramatically. Furthermore, most state-of-the-art object detection methods can be trained and applied with very little work, thanks to a standardisation effort of the ground-truth data format [16] and common deep-learning frameworks [17]. Reference data sets such as PubLayNet [6] and DocBank provide their data in the commonly accepted COCO format [16]. +Lately, new types of ML models for document-layout analysis have emerged in the community [18-21]. These models do not approach the problem of layout analysis purely based on an image representation of the page, as computer vision methods do. Instead, they combine the text tokens and image representation of a page in order to obtain a segmentation. While the reported accuracies appear to be promising, a broadly accepted data format which links geometric and textual features has yet to establish. +3 THE DOCLAYNET DATASET +DocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape of labeled, rectangular boundingboxes. We define 11 distinct labels for layout features, namely Caption , Footnote , Formula , List-item , Page-footer , Page-header , Picture , Section-header , Table , Text , and Title . Our reasoning for picking this particular label set is detailed in Section 4. +In addition to open intellectual property constraints for the source documents, we required that the documents in DocLayNet adhere to a few conditions. Firstly, we kept scanned documents +Figure 2: Distribution of DocLayNet pages across document categories. +
+ +Figure 2: Distribution of DocLayNet pages across document categories. +
+to a minimum, since they introduce difficulties in annotation (see Section 4). As a second condition, we focussed on medium to large documents ( > 10 pages) with technical content, dense in complex tables, figures, plots and captions. Such documents carry a lot of information value, but are often hard to analyse with high accuracy due to their challenging layouts. Counterexamples of documents not included in the dataset are receipts, invoices, hand-written documents or photographs showing "text in the wild". +The pages in DocLayNet can be grouped into six distinct categories, namely Financial Reports , Manuals , Scientific Articles , Laws & Regulations , Patents and Government Tenders . Each document category was sourced from various repositories. For example, Financial Reports contain both free-style format annual reports 2 which expose company-specific, artistic layouts as well as the more formal SEC filings. The two largest categories ( Financial Reports and Manuals ) contain a large amount of free-style layouts in order to obtain maximum variability. In the other four categories, we boosted the variability by mixing documents from independent providers, such as different government websites or publishers. In Figure 2, we show the document categories contained in DocLayNet with their respective sizes. +We did not control the document selection with regard to language. The vast majority of documents contained in DocLayNet (close to 95%) are published in English language. However, DocLayNet also contains a number of documents in other languages such as German (2.5%), French (1.0%) and Japanese (1.0%). While the document language has negligible impact on the performance of computer vision methods such as object detection and segmentation models, it might prove challenging for layout analysis methods which exploit textual features. +To ensure that future benchmarks in the document-layout analysis community can be easily compared, we have split up DocLayNet into pre-defined train-, test- and validation-sets. In this way, we can avoid spurious variations in the evaluation scores due to random splitting in train-, test- and validation-sets. We also ensured that less frequent labels are represented in train and test sets in equal proportions. +Table 1 shows the overall frequency and distribution of the labels among the different sets. Importantly, we ensure that subsets are only split on full-document boundaries. This avoids that pages of the same document are spread over train, test and validation set, which can give an undesired evaluation advantage to models and lead to overestimation of their prediction accuracy. We will show the impact of this decision in Section 5. +In order to accommodate the different types of models currently in use by the community, we provide DocLayNet in an augmented COCO format [16]. This entails the standard COCO ground-truth file (in JSON format) with the associated page images (in PNG format, 1025 × 1025 pixels). Furthermore, custom fields have been added to each COCO record to specify document category, original document filename and page number. In addition, we also provide the original PDF pages, as well as sidecar files containing parsed PDF text and text-cell coordinates (in JSON). All additional files are linked to the primary page images by their matching filenames. +Despite being cost-intense and far less scalable than automation, human annotation has several benefits over automated groundtruth generation. The first and most obvious reason to leverage human annotations is the freedom to annotate any type of document without requiring a programmatic source. For most PDF documents, the original source document is not available. The latter is not a hard constraint with human annotation, but it is for automated methods. A second reason to use human annotations is that the latter usually provide a more natural interpretation of the page layout. The human-interpreted layout can significantly deviate from the programmatic layout used in typesetting. For example, "invisible" tables might be used solely for aligning text paragraphs on columns. Such typesetting tricks might be interpreted by automated methods incorrectly as an actual table, while the human annotation will interpret it correctly as Text or other styles. The same applies to multi-line text elements, when authors decided to space them as "invisible" list elements without bullet symbols. A third reason to gather ground-truth through human annotation is to estimate a "natural" upper bound on the segmentation accuracy. As we will show in Section 4, certain documents featuring complex layouts can have different but equally acceptable layout interpretations. This natural upper bound for segmentation accuracy can be found by annotating the same pages multiple times by different people and evaluating the inter-annotator agreement. Such a baseline consistency evaluation is very useful to define expectations for a good target accuracy in trained deep neural network models and avoid overfitting (see Table 1). On the flip side, achieving high annotation consistency proved to be a key challenge in human annotation, as we outline in Section 4. +4 ANNOTATION CAMPAIGN +The annotation campaign was carried out in four phases. In phase one, we identified and prepared the data sources for annotation. In phase two, we determined the class labels and how annotations should be done on the documents in order to obtain maximum consistency. The latter was guided by a detailed requirement analysis and exhaustive experiments. In phase three, we trained the annotation staff and performed exams for quality assurance. In phase four, +Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row "Total") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. + + + +% of Total% of Total% of Totaltriple inter-annotator mAP @ 0.5-0.95 (%)triple inter-annotator mAP @ 0.5-0.95 (%)triple inter-annotator mAP @ 0.5-0.95 (%)triple inter-annotator mAP @ 0.5-0.95 (%)triple inter-annotator mAP @ 0.5-0.95 (%)triple inter-annotator mAP @ 0.5-0.95 (%)triple inter-annotator mAP @ 0.5-0.95 (%) +class labelCountTrainTestValAllFinManSciLawPatTen +Caption225242.041.772.3284-8940-6186-9294-9995-9969-78n/a +Footnote63180.600.310.5883-91n/a10062-8885-94n/a82-97 +Formula250272.251.902.9683-85n/an/a84-8786-96n/an/a +List-item18566017.1913.3415.8287-8874-8390-9297-9781-8575-8893-95 +Page-footer708786.515.586.0093-9488-9095-9610092-9710096-98 +Page-header580225.106.705.0685-8966-7690-9498-10091-9297-9981-86 +Picture459764.212.785.3169-7156-5982-8669-8280-9566-7159-76 +Section-header14288412.6015.7712.8583-8476-8190-9294-9587-9469-7378-86 +Table347333.202.273.6077-8175-8083-8698-9958-8079-8470-85 +Text51037745.8249.2845.0084-8681-8688-9389-9387-9271-7987-95 +Title50710.470.300.5060-7224-6350-6394-10082-9668-7924-56 +Total1107470941123998166653182-8371-7479-8189-9486-9171-7668-85 +
Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row "Total") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges.
+Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right. +
+ +Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right. +
+we distributed the annotation workload and performed continuous quality controls. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised. +Phase 1: Data selection and preparation. Our inclusion criteria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources +include publication repositories such as arXiv$^{3}$, government offices, company websites as well as data directory services for financial reports and patents. Scanned documents were excluded wherever possible because they can be rotated or skewed. This would not allow us to perform annotation with rectangular bounding-boxes and therefore complicate the annotation process. +Preparation work included uploading and parsing the sourced PDF documents in the Corpus Conversion Service (CCS) [22], a cloud-native platform which provides a visual annotation interface and allows for dataset inspection and analysis. The annotation interface of CCS is shown in Figure 3. The desired balance of pages between the different document categories was achieved by selective subsampling of pages with certain desired properties. For example, we made sure to include the title page of each document and bias the remaining page selection to those with figures or tables. The latter was achieved by leveraging pre-trained object detection models from PubLayNet, which helped us estimate how many figures and tables a given page contains. +Phase 2: Label selection and guideline. We reviewed the collected documents and identified the most common structural features they exhibit. This was achieved by identifying recurrent layout elements and lead us to the definition of 11 distinct class labels. These 11 class labels are Caption , Footnote , Formula , List-item , Pagefooter , Page-header , Picture , Section-header , Table , Text , and Title . Critical factors that were considered for the choice of these class labels were (1) the overall occurrence of the label, (2) the specificity of the label, (3) recognisability on a single page (i.e. no need for context from previous or next page) and (4) overall coverage of the page. Specificity ensures that the choice of label is not ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and Affiliation , as seen in DocBank, are often only distinguishable by discriminating on +the textual content of an element, which goes beyond visual layout recognition, in particular outside the Scientific Articles category. +At first sight, the task of visual document-layout interpretation appears intuitive enough to obtain plausible annotations in most cases. However, during early trial-runs in the core team, we observed many cases in which annotators use different annotation styles, especially for documents with challenging layouts. For example, if a figure is presented with subfigures, one annotator might draw a single figure bounding-box, while another might annotate each subfigure separately. The same applies for lists, where one might annotate all list items in one block or each list item separately. In essence, we observed that challenging layouts would be annotated in different but plausible ways. To illustrate this, we show in Figure 4 multiple examples of plausible but inconsistent annotations on the same pages. +Obviously, this inconsistency in annotations is not desirable for datasets which are intended to be used for model training. To minimise these inconsistencies, we created a detailed annotation guideline. While perfect consistency across 40 annotation staff members is clearly not possible to achieve, we saw a huge improvement in annotation consistency after the introduction of our annotation guideline. A few selected, non-trivial highlights of the guideline are: +(1) Every list-item is an individual object instance with class label List-item . This definition is different from PubLayNet and DocBank, where all list-items are grouped together into one List object. +(2) A List-item is a paragraph with hanging indentation. Singleline elements can qualify as List-item if the neighbour elements expose hanging indentation. Bullet or enumeration symbols are not a requirement. +(3) For every Caption , there must be exactly one corresponding Picture or Table . +(4) Connected sub-pictures are grouped together in one Picture object. +(5) Formula numbers are included in a Formula object. +(6) Emphasised text (e.g. in italic or bold) at the beginning of a paragraph is not considered a Section-header , unless it appears exclusively on its own line. +The complete annotation guideline is over 100 pages long and a detailed description is obviously out of scope for this paper. Nevertheless, it will be made publicly available alongside with DocLayNet for future reference. +Phase 3: Training. After a first trial with a small group of people, we realised that providing the annotation guideline and a set of random practice pages did not yield the desired quality level for layout annotation. Therefore we prepared a subset of pages with two different complexity levels, each with a practice and an exam part. 974 pages were reference-annotated by one proficient core team member. Annotation staff were then given the task to annotate the same subsets (blinded from the reference). By comparing the annotations of each staff member with the reference annotations, we could quantify how closely their annotations matched the reference. Only after passing two exam levels with high annotation quality, staff were admitted into the production phase. Practice iterations +Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous. +
+ +Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous. +
+were carried out over a timeframe of 12 weeks, after which 8 of the 40 initially allocated annotators did not pass the bar. +Phase 4: Production annotation. The previously selected 80K pages were annotated with the defined 11 class labels by 32 annotators. This production phase took around three months to complete. All annotations were created online through CCS, which visualises the programmatic PDF text-cells as an overlay on the page. The page annotation are obtained by drawing rectangular bounding-boxes, as shown in Figure 3. With regard to the annotation practices, we implemented a few constraints and capabilities on the tooling level. First, we only allow non-overlapping, vertically oriented, rectangular boxes. For the large majority of documents, this constraint was sufficient and it speeds up the annotation considerably in comparison with arbitrary segmentation shapes. Second, annotator staff were not able to see each other's annotations. This was enforced by design to avoid any bias in the annotation, which could skew the numbers of the inter-annotator agreement (see Table 1). We wanted +Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset. + + + +humanMRCNNMRCNNFRCNNYOLO +humanR50R101R101v5x6 +Caption84-8968.471.570.177.7 +Footnote83-9170.971.873.777.2 +Formula83-8560.163.463.566.2 +List-item87-8881.280.881.086.2 +Page-footer93-9461.659.358.961.1 +Page-header85-8971.970.072.067.9 +Picture69-7171.772.772.077.1 +Section-header83-8467.669.368.474.6 +Table77-8182.282.982.286.3 +Text84-8684.685.885.488.1 +Title60-7276.780.479.982.7 +All82-8372.473.573.476.8 +
Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.
+to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and Picture . For the latter, we instructed annotation staff to minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way to flag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in the final dataset. With all these measures in place, experienced annotation staff managed to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity. +5 EXPERIMENTS +The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this +Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions. +
+ +Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions. +
+paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work. +In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16]. +Baselines for Object Detection +In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of 1025 × 1025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as Text , Table and Picture . This is not entirely surprising, as Text , Table and Picture are abundant and the most visually distinctive in a document. +Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels. + + + +Class-count11654 +Caption68TextTextText +Footnote71TextTextText +Formula60TextTextText +List-item81Text82Text +Page-footer6262-- +Page-header7268-- +Picture72727272 +Section-header68676968 +Table82838282 +Text85848484 +Title77Sec.-h.Sec.-h.Sec.-h. +Overall72737877 +
Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels.
+Learning Curve +One of the fundamental questions related to any dataset is if it is "large enough". To answer this question for DocLayNet, we performed a data ablation study in which we evaluated a Mask R-CNN model trained on increasing fractions of the DocLayNet dataset. As can be seen in Figure 5, the mAP score rises sharply in the beginning and eventually levels out. To estimate the error-bar on the metrics, we ran the training five times on the entire data-set. This resulted in a 1% error-bar, depicted by the shaded area in Figure 5. In the inset of Figure 5, we show the exact same data-points, but with a logarithmic scale on the x-axis. As is expected, the mAP score increases linearly as a function of the data-size in the inset. The curve ultimately flattens out between the 80% and 100% mark, with the 80% mark falling within the error-bars of the 100% mark. This provides a good indication that the model would not improve significantly by yet increasing the data size. Rather, it would probably benefit more from improved data consistency (as discussed in Section 3), data augmentation methods [23], or the addition of more document categories and styles. +Impact of Class Labels +The choice and number of labels can have a significant effect on the overall model performance. Since PubLayNet, DocBank and DocLayNet all have different label sets, it is of particular interest to understand and quantify this influence of the label set on the model performance. We investigate this by either down-mapping labels into more common ones (e.g. Caption → Text ) or excluding them from the annotations entirely. Furthermore, it must be stressed that all mappings and exclusions were performed on the data before model training. In Table 3, we present the mAP scores for a Mask R-CNN R50 network on different label sets. Where a label is down-mapped, we show its corresponding label, otherwise it was excluded. We present three different label sets, with 6, 5 and 4 different labels respectively. The set of 5 labels contains the same labels as PubLayNet. However, due to the different definition of +Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement. + + + +Class-count111155 +SplitDocPageDocPage +Caption6883 +Footnote7184 +Formula6066 +List-item81888288 +Page-footer6289 +Page-header7290 +Picture72827282 +Section-header68836983 +Table82898290 +Text85918490 +Title7781 +All72847887 +
Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement.
+lists in PubLayNet (grouped list-items) versus DocLayNet (separate list-items), the label set of size 4 is the closest to PubLayNet, in the assumption that the List is down-mapped to Text in PubLayNet. The results in Table 3 show that the prediction accuracy on the remaining class labels does not change significantly when other classes are merged into them. The overall macro-average improves by around 5%, in particular when Page-footer and Page-header are excluded. +Impact of Document Split in Train and Test Set +Many documents in DocLayNet have a unique styling. In order to avoid overfitting on a particular style, we have split the train-, test- and validation-sets of DocLayNet on document boundaries, i.e. every document contributes pages to only one set. To the best of our knowledge, this was not considered in PubLayNet or DocBank. To quantify how this affects model performance, we trained and evaluated a Mask R-CNN R50 model on a modified dataset version. Here, the train-, test- and validation-sets were obtained by a randomised draw over the individual pages. As can be seen in Table 4, the difference in model performance is surprisingly large: pagewise splitting gains ˜ 10% in mAP over the document-wise splitting. Thus, random page-wise splitting of DocLayNet can easily lead to accidental overestimation of model performance and should be avoided. +Dataset Comparison +Throughout this paper, we claim that DocLayNet's wider variety of document layouts leads to more robust layout detection models. In Table 5, we provide evidence for that. We trained models on each of the available datasets (PubLayNet, DocBank and DocLayNet) and evaluated them on the test sets of the other datasets. Due to the different label sets and annotation styles, a direct comparison is not possible. Hence, we focussed on the common labels among the datasets. Between PubLayNet and DocLayNet, these are Picture , +Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets. + + + +Testing onTesting onTesting on +labelsPLNDBDLN +Figure964323 +Sec-header87-32 +Table952449 +Text96-42 +total933430 +Figure777131 +Table196522 +total486827 +Figure675172 +Sec-header53-68 +Table874382 +Text77-84 +total594778 +
Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets.
+Section-header , Table and Text . Before training, we either mapped or excluded DocLayNet's other labels as specified in table 3, and also PubLayNet's List to Text . Note that the different clustering of lists (by list-element vs. whole list objects) naturally decreases the mAP score for Text . +For comparison of DocBank with DocLayNet, we trained only on Picture and Table clusters of each dataset. We had to exclude Text because successive paragraphs are often grouped together into a single object in DocBank. This paragraph grouping is incompatible with the individual paragraphs of DocLayNet. As can be seen in Table 5, DocLayNet trained models yield better performance compared to the previous datasets. It is noteworthy that the models trained on PubLayNet and DocBank perform very well on their own test set, but have a much lower performance on the foreign datasets. While this also applies to DocLayNet, the difference is far less pronounced. Thus we conclude that DocLayNet trained models are overall more robust and will produce better results for challenging, unseen layouts. +Example Predictions +To conclude this section, we illustrate the quality of layout predictions one can expect from DocLayNet-trained models by providing a selection of examples without any further post-processing applied. Figure 6 shows selected layout predictions on pages from the test-set of DocLayNet. Results look decent in general across document categories, however one can also observe mistakes such as overlapping clusters of different classes, or entirely missing boxes due to low confidence. +6 CONCLUSION +In this paper, we presented the DocLayNet dataset. It provides the document conversion and layout analysis research community a new and challenging dataset to improve and fine-tune novel ML methods on. In contrast to many other datasets, DocLayNet was created by human annotation in order to obtain reliable layout ground-truth on a wide variety of publication- and typesettingstyles. Including a large proportion of documents outside the scientific publishing domain adds significant value in this respect. +From the dataset, we have derived on the one hand reference metrics for human performance on document-layout annotation (through double and triple annotations) and on the other hand evaluated the baseline performance of commonly used object detection methods. We also illustrated the impact of various dataset-related aspects on model performance through data-ablation experiments, both from a size and class-label perspective. Last but not least, we compared the accuracy of models trained on other public datasets and showed that DocLayNet trained models are more robust. +To date, there is still a significant gap between human and ML accuracy on the layout interpretation task, and we hope that this work will inspire the research community to close that gap. +REFERENCES +[1] Max Göbel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013. +[2] Christian Clausner, Apostolos Antonacopoulos, and Stefan Pletschacher. Icdar2017 competition on recognition of documents with complex layouts rdcl2017. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 1404-1410, 2017. +[3] Hervé Déjean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), April 2019. http://sac.founderit.com/. +[4] Antonio Jimeno Yepes, Peter Zhong, and Douglas Burdick. Competition on scientific literature parsing. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 605-617. LNCS 12824, SpringerVerlag, sep 2021. +[5] Logan Markewich, Hao Zhang, Yubin Xing, Navid Lambert-Shirzad, Jiang Zhexin, Roy Lee, Zhi Li, and Seok-Bum Ko. Segmentation for document layout analysis: not dead yet. International Journal on Document Analysis and Recognition (IJDAR) , pages 1-11, 01 2022. +[6] Xu Zhong, Jianbin Tang, and Antonio Jimeno-Yepes. Publaynet: Largest dataset ever for document layout analysis. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 1015-1022, sep 2019. +[7] Minghao Li, Yiheng Xu, Lei Cui, Shaohan Huang, Furu Wei, Zhoujun Li, and Ming Zhou. Docbank: A benchmark dataset for document layout analysis. In Proceedings of the 28th International Conference on Computational Linguistics , COLING, pages 949-960. International Committee on Computational Linguistics, dec 2020. +[8] Riaz Ahmad, Muhammad Tanvir Afzal, and M. Qadir. Information extraction from pdf sources based on rule-based system using integrated formats. In SemWebEval@ESWC , 2016. +[9] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In IEEE Conference on Computer Vision and Pattern Recognition , CVPR, pages 580-587. IEEE Computer Society, jun 2014. +[10] Ross B. Girshick. Fast R-CNN. In 2015 IEEE International Conference on Computer Vision , ICCV, pages 1440-1448. IEEE Computer Society, dec 2015. +[11] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis and Machine Intelligence , 39(6):1137-1149, 2017. +[12] Kaiming He, Georgia Gkioxari, Piotr Dollár, and Ross B. Girshick. Mask R-CNN. In IEEE International Conference on Computer Vision , ICCV, pages 2980-2988. IEEE Computer Society, Oct 2017. +[13] Glenn Jocher, Alex Stoken, Ayush Chaurasia, Jirka Borovec, NanoCode012, TaoXie, Yonghye Kwon, Kalen Michael, Liu Changyu, Jiacong Fang, Abhiram V, Laughing, tkianai, yxNONG, Piotr Skalski, Adam Hogan, Jebastin Nadar, imyhxy, Lorenzo Mammana, Alex Wang, Cristi Fati, Diego Montes, Jan Hajek, Laurentiu +Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes. +
+ +Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes. +
+Diaconu, Mai Thanh Minh, Marc, albinxavi, fatih, oleg, and wanghao yang. ultralytics/yolov5: v6.0 - yolov5n nano models, roboflow integration, tensorflow export, opencv dnn support, October 2021. +[14] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-end object detection with transformers. CoRR , abs/2005.12872, 2020. +[15] Mingxing Tan, Ruoming Pang, and Quoc V. Le. Efficientdet: Scalable and efficient object detection. CoRR , abs/1911.09070, 2019. +[16] Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollár, and C. Lawrence Zitnick. Microsoft COCO: common objects in context, 2014. +[17] Yuxin Wu, Alexander Kirillov, Francisco Massa, Wan-Yen Lo, and Ross Girshick. Detectron2, 2019. +[18] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter W. J. Staar. Robust pdf document conversion using recurrent neural networks. In Proceedings of the 35th Conference on Artificial Intelligence , AAAI, pages 1513715145, feb 2021. +[19] Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. Layoutlm: Pre-training of text and layout for document image understanding. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 1192-1200, New York, USA, 2020. Association for Computing Machinery. +[20] Shoubin Li, Xuyan Ma, Shuaiqun Pan, Jun Hu, Lin Shi, and Qing Wang. Vtlayout: Fusion of visual and text features for document layout analysis, 2021. +[21] Peng Zhang, Can Li, Liang Qiao, Zhanzhan Cheng, Shiliang Pu, Yi Niu, and Fei Wu. Vsr: A unified framework for document layout analysis combining vision, semantics and relations, 2021. +[22] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 774-782. ACM, 2018. +[23] Connor Shorten and Taghi M. Khoshgoftaar. A survey on image data augmentation for deep learning. Journal of Big Data , 6(1):60, 2019. +
\ No newline at end of file diff --git a/tests/data/2206.01062.json b/tests/data/2206.01062.json index 0786a34d..32114c4d 100644 --- a/tests/data/2206.01062.json +++ b/tests/data/2206.01062.json @@ -1 +1 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2206.01062.pdf", "filename-prov": null, "document-hash": "5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc", "#-pages": 9, "collection-name": null, "description": null, "page-hashes": [{"hash": "3c76b6d3fd82865e42c51d5cbd7d1a9996dba7902643b919acc581e866b92716", "model": "default", "page": 1}, {"hash": "5ccfaddd314d3712cbabc857c8c0f33d1268341ce37b27089857cbf09f0522d4", "model": "default", "page": 2}, {"hash": "d2dc51ad0a01ee9486ffe248649ee1cd10ce35773de8e4b21abf30d310f4fc26", "model": "default", "page": 3}, {"hash": "310121977375f8f1106412189943bd70f121629b2b4d35394077233dedbfb041", "model": "default", "page": 4}, {"hash": "09fa72b602eb0640669844acabc17ef494802a4a9188aeaaf0e0131c496e6951", "model": "default", "page": 5}, {"hash": "ec3fa60f136f3d9f5fa790ab27f5d1c14e5622573c52377b909b591d0be0ea44", "model": "default", "page": 6}, {"hash": "ec1bc56fe581ce95615b1fab11c3ba8fc89662acf2f53446decd380a155b06dd", "model": "default", "page": 7}, {"hash": "fbd2b06876dddc19ee08e0a9751d978c03e6943b74bedf1d83d6528cd4f8954d", "model": "default", "page": 8}, {"hash": "6cfa4eb4410fa9972da289dbf8d8cc585d317a192e1214c778ddd7768e98f311", "model": "default", "page": 9}]}, "main-text": [{"text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [107.30000305175781, 672.3833618164062, 505.1857604980469, 709.082275390625], "page": 1, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [90.94670867919922, 611.2825317382812, 193.91998291015625, 658.7803344726562], "page": 1, "span": [0, 73], "__ref_s3_data": null}]}, {"text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [254.97935485839844, 611.7597045898438, 357.8802490234375, 658.7174072265625], "page": 1, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [419.0672302246094, 611.7597045898438, 522.0595703125, 658.9878540039062], "page": 1, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [171.90907287597656, 553.3746948242188, 275.3072509765625, 600.1580200195312], "page": 1, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [336.5292053222656, 553.3746948242188, 439.84405517578125, 599.942626953125], "page": 1, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "ABSTRACT", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.33011245727539, 533.9879760742188, 112.2127456665039, 544.47509765625], "page": 1, "span": [0, 8], "__ref_s3_data": null}]}, {"text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.857933044433594, 257.10565185546875, 295.5601806640625, 529.5941162109375], "page": 1, "span": [0, 1595], "__ref_s3_data": null}]}, {"text": "CCS CONCEPTS", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.36912155151367, 230.69398498535156, 134.81988525390625, 241.21551513671875], "page": 1, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; Object detection ;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.02470016479492, 194.8704071044922, 297.8529357910156, 226.241455078125], "page": 1, "span": [0, 170], "__ref_s3_data": null}]}, {"text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.33460235595703, 117.82738494873047, 295.11798095703125, 158.33511352539062], "page": 1, "span": [0, 397], "__ref_s3_data": null}]}, {"text": "KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.31700134277344, 85.73310852050781, 197.8627471923828, 116.91976928710938], "page": 1, "span": [0, 168], "__ref_s3_data": null}]}, {"text": "Figure 1: Four examples of complex page layouts across different document categories", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [317.2291564941406, 232.3291473388672, 559.8057861328125, 252.12974548339844], "page": 1, "span": [0, 84], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"text": "KEYWORDS", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.11431884765625, 189.22499084472656, 379.82049560546875, 199.97215270996094], "page": 1, "span": [0, 8], "__ref_s3_data": null}]}, {"text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.2037658691406, 164.9988250732422, 559.2164306640625, 184.67845153808594], "page": 1, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "ACM Reference Format:", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.3434753417969, 144.41390991210938, 404.6536560058594, 152.36439514160156], "page": 1, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.1117248535156, 84.62297058105469, 559.5494995117188, 142.41151428222656], "page": 1, "span": [0, 374], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.19501876831055, 722.7692260742188, 558.4357299804688, 732.1524047851562], "page": 2, "span": [0, 130], "__ref_s3_data": null}]}, {"text": "1 INTRODUCTION", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.79800033569336, 695.8309936523438, 156.52899169921875, 706.4523315429688], "page": 2, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.80397415161133, 562.986572265625, 303.1766357421875, 681.3472290039062], "page": 2, "span": [0, 702], "__ref_s3_data": null}]}, {"text": "A key problem in the process of document conversion is to understand the structure of a single document page, i.e. which segments of text should be grouped together in a unit. To train models for this task, there are currently two large datasets available to the community, PubLayNet [6] and DocBank [7]. They were introduced in 2019 and 2020 respectively and significantly accelerated the implementation of layout detection and segmentation models due to their sizes of 300K and 500K ground-truth pages. These sizes were achieved by leveraging an automation approach. The benefit of automated ground-truth generation is obvious: one can generate large ground-truth datasets at virtually no cost. However, the automation introduces a constraint on the variability in the dataset, because corresponding structured source data must be available. PubLayNet and DocBank were both generated from scientific document repositories (PubMed and arXiv), which provide XML or L A T E X sources. Those scientific documents present a limited variability in their layouts, because they are typeset in uniform templates provided by the publishers. Obviously, documents such as technical manuals, annual company reports, legal text, government tenders, etc. have very different and partially unique layouts. As a consequence, the layout predictions obtained from models trained on PubLayNet or DocBank is very reasonable when applied on scientific documents. However, for more artistic or free-style layouts, we see sub-par prediction quality from these models, which we demonstrate in Section 5.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.89326477050781, 289.0808410644531, 295.5641174316406, 561.2902221679688], "page": 2, "span": [0, 1580], "__ref_s3_data": null}]}, {"text": "In this paper, we present the DocLayNet dataset. It provides pageby-page layout annotation ground-truth using bounding-boxes for 11 distinct class labels on 80863 unique document pages, of which a fraction carry double- or triple-annotations. DocLayNet is similar in spirit to PubLayNet and DocBank and will likewise be made available to the public 1 in order to stimulate the document-layout analysis community. It distinguishes itself in the following aspects:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.12458419799805, 212.36782836914062, 295.56396484375, 287.0208740234375], "page": 2, "span": [0, 462], "__ref_s3_data": null}]}, {"text": "(1) Human Annotation : In contrast to PubLayNet and DocBank, we relied on human annotation instead of automation approaches to generate the data set.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.64593505859375, 176.96405029296875, 295.5616455078125, 208.28524780273438], "page": 2, "span": [0, 149], "__ref_s3_data": null}]}, {"text": "(2) Large Layout Variability : We include diverse and complex layouts from a large variety of public sources.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.50244140625, 154.92233276367188, 294.3029479980469, 174.95782470703125], "page": 2, "span": [0, 109], "__ref_s3_data": null}]}, {"text": "(3) Detailed Label Set : We define 11 class labels to distinguish layout features in high detail. PubLayNet provides 5 labels; DocBank provides 13, although not a superset of ours.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.18266296386719, 121.99307250976562, 294.6838073730469, 153.57122802734375], "page": 2, "span": [0, 180], "__ref_s3_data": null}]}, {"text": "(4) Redundant Annotations : A fraction of the pages in the DocLayNet data set carry more than one human annotation.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.30329132080078, 99.92230987548828, 295.56439208984375, 120.3491439819336], "page": 2, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "$^{1}$https://developer.ibm.com/exchanges/data/all/doclaynet", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [53.60314178466797, 82.76702880859375, 216.05824279785156, 90.63584899902344], "page": 2, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "This enables experimentation with annotation uncertainty and quality control analysis.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [341.2403564453125, 685.3028564453125, 558.5009765625, 705.5034790039062], "page": 2, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "(5) Pre-defined Train-, Test- & Validation-set : Like DocBank, we provide fixed train-, test- & validation-sets to ensure proportional representation of the class-labels. Further, we prevent leakage of unique layouts across sets, which has a large effect on model accuracy scores.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [328.06146240234375, 630.4351806640625, 559.7210083007812, 683.4995727539062], "page": 2, "span": [0, 280], "__ref_s3_data": null}]}, {"text": "All aspects outlined above are detailed in Section 3. In Section 4, we will elaborate on how we designed and executed this large-scale human annotation campaign. We will also share key insights and lessons learned that might prove helpful for other parties planning to set up annotation campaigns.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0706787109375, 571.292724609375, 559.1903076171875, 624.9239501953125], "page": 2, "span": [0, 297], "__ref_s3_data": null}]}, {"text": "In Section 5, we will present baseline accuracy numbers for a variety of object detection methods (Faster R-CNN, Mask R-CNN and YOLOv5) trained on DocLayNet. We further show how the model performance is impacted by varying the DocLayNet dataset size, reducing the label set and modifying the train/test-split. Last but not least, we compare the performance of models trained on PubLayNet, DocBank and DocLayNet and demonstrate that a model trained on DocLayNet provides overall more robust layout recovery.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.9918518066406, 483.6390686035156, 559.5819702148438, 569.6455078125], "page": 2, "span": [0, 506], "__ref_s3_data": null}]}, {"text": "2 RELATED WORK", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.33935546875, 460.4820251464844, 422.0046081542969, 471.2471923828125], "page": 2, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "While early approaches in document-layout analysis used rulebased algorithms and heuristics [8], the problem is lately addressed with deep learning methods. The most common approach is to leverage object detection models [9-15]. In the last decade, the accuracy and speed of these models has increased dramatically. Furthermore, most state-of-the-art object detection methods can be trained and applied with very little work, thanks to a standardisation effort of the ground-truth data format [16] and common deep-learning frameworks [17]. Reference data sets such as PubLayNet [6] and DocBank provide their data in the commonly accepted COCO format [16].", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.9687805175781, 327.7038269042969, 559.7161254882812, 446.38397216796875], "page": 2, "span": [0, 655], "__ref_s3_data": null}]}, {"text": "Lately, new types of ML models for document-layout analysis have emerged in the community [18-21]. These models do not approach the problem of layout analysis purely based on an image representation of the page, as computer vision methods do. Instead, they combine the text tokens and image representation of a page in order to obtain a segmentation. While the reported accuracies appear to be promising, a broadly accepted data format which links geometric and textual features has yet to establish.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.156982421875, 239.59246826171875, 559.1864624023438, 325.6906433105469], "page": 2, "span": [0, 500], "__ref_s3_data": null}]}, {"text": "3 THE DOCLAYNET DATASET", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.58740234375, 216.37100219726562, 477.8531799316406, 226.6800994873047], "page": 2, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "DocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape of labeled, rectangular boundingboxes. We define 11 distinct labels for layout features, namely Caption , Footnote , Formula , List-item , Page-footer , Page-header , Picture , Section-header , Table , Text , and Title . Our reasoning for picking this particular label set is detailed in Section 4.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.11236572265625, 116.19312286376953, 559.7131958007812, 202.27523803710938], "page": 2, "span": [0, 522], "__ref_s3_data": null}]}, {"text": "In addition to open intellectual property constraints for the source documents, we required that the documents in DocLayNet adhere to a few conditions. Firstly, we kept scanned documents", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.34619140625, 83.59282684326172, 558.5303344726562, 114.41421508789062], "page": 2, "span": [0, 186], "__ref_s3_data": null}]}, {"text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.4626579284668, 722.95458984375, 347.0511779785156, 732.11474609375], "page": 3, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [365.31488037109375, 723.0569458007812, 558.807861328125, 731.9796142578125], "page": 3, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "Figure 2: Distribution of DocLayNet pages across document categories.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [53.28777313232422, 536.294677734375, 294.0437316894531, 556.148193359375], "page": 3, "span": [0, 69], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"text": "to a minimum, since they introduce difficulties in annotation (see Section 4). As a second condition, we focussed on medium to large documents ( > 10 pages) with technical content, dense in complex tables, figures, plots and captions. Such documents carry a lot of information value, but are often hard to analyse with high accuracy due to their challenging layouts. Counterexamples of documents not included in the dataset are receipts, invoices, hand-written documents or photographs showing \"text in the wild\".", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.244232177734375, 424.931396484375, 294.5379943847656, 510.7526550292969], "page": 3, "span": [0, 513], "__ref_s3_data": null}]}, {"text": "The pages in DocLayNet can be grouped into six distinct categories, namely Financial Reports , Manuals , Scientific Articles , Laws & Regulations , Patents and Government Tenders . Each document category was sourced from various repositories. For example, Financial Reports contain both free-style format annual reports 2 which expose company-specific, artistic layouts as well as the more formal SEC filings. The two largest categories ( Financial Reports and Manuals ) contain a large amount of free-style layouts in order to obtain maximum variability. In the other four categories, we boosted the variability by mixing documents from independent providers, such as different government websites or publishers. In Figure 2, we show the document categories contained in DocLayNet with their respective sizes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.10974884033203, 282.6438293457031, 295.5604553222656, 423.1407775878906], "page": 3, "span": [0, 810], "__ref_s3_data": null}]}, {"text": "We did not control the document selection with regard to language. The vast majority of documents contained in DocLayNet (close to 95%) are published in English language. However, DocLayNet also contains a number of documents in other languages such as German (2.5%), French (1.0%) and Japanese (1.0%). While the document language has negligible impact on the performance of computer vision methods such as object detection and segmentation models, it might prove challenging for layout analysis methods which exploit textual features.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.8973388671875, 183.77932739257812, 295.5615539550781, 281.3227233886719], "page": 3, "span": [0, 535], "__ref_s3_data": null}]}, {"text": "To ensure that future benchmarks in the document-layout analysis community can be easily compared, we have split up DocLayNet into pre-defined train-, test- and validation-sets. In this way, we can avoid spurious variations in the evaluation scores due to random splitting in train-, test- and validation-sets. We also ensured that less frequent labels are represented in train and test sets in equal proportions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.209388732910156, 106.8985824584961, 295.56396484375, 182.471923828125], "page": 3, "span": [0, 413], "__ref_s3_data": null}]}, {"text": "$^{2}$e.g. AAPL from https://www.annualreports.com/", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [53.352603912353516, 83.35768127441406, 195.78997802734375, 91.47167205810547], "page": 3, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "Table 1 shows the overall frequency and distribution of the labels among the different sets. Importantly, we ensure that subsets are only split on full-document boundaries. This avoids that pages of the same document are spread over train, test and validation set, which can give an undesired evaluation advantage to models and lead to overestimation of their prediction accuracy. We will show the impact of this decision in Section 5.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0691833496094, 630.5088500976562, 559.1918334960938, 705.8527221679688], "page": 3, "span": [0, 435], "__ref_s3_data": null}]}, {"text": "In order to accommodate the different types of models currently in use by the community, we provide DocLayNet in an augmented COCO format [16]. This entails the standard COCO ground-truth file (in JSON format) with the associated page images (in PNG format, 1025 \u00d7 1025 pixels). Furthermore, custom fields have been added to each COCO record to specify document category, original document filename and page number. In addition, we also provide the original PDF pages, as well as sidecar files containing parsed PDF text and text-cell coordinates (in JSON). All additional files are linked to the primary page images by their matching filenames.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.05938720703125, 520.8086547851562, 558.862060546875, 628.44580078125], "page": 3, "span": [0, 645], "__ref_s3_data": null}]}, {"text": "Despite being cost-intense and far less scalable than automation, human annotation has several benefits over automated groundtruth generation. The first and most obvious reason to leverage human annotations is the freedom to annotate any type of document without requiring a programmatic source. For most PDF documents, the original source document is not available. The latter is not a hard constraint with human annotation, but it is for automated methods. A second reason to use human annotations is that the latter usually provide a more natural interpretation of the page layout. The human-interpreted layout can significantly deviate from the programmatic layout used in typesetting. For example, \"invisible\" tables might be used solely for aligning text paragraphs on columns. Such typesetting tricks might be interpreted by automated methods incorrectly as an actual table, while the human annotation will interpret it correctly as Text or other styles. The same applies to multi-line text elements, when authors decided to space them as \"invisible\" list elements without bullet symbols. A third reason to gather ground-truth through human annotation is to estimate a \"natural\" upper bound on the segmentation accuracy. As we will show in Section 4, certain documents featuring complex layouts can have different but equally acceptable layout interpretations. This natural upper bound for segmentation accuracy can be found by annotating the same pages multiple times by different people and evaluating the inter-annotator agreement. Such a baseline consistency evaluation is very useful to define expectations for a good target accuracy in trained deep neural network models and avoid overfitting (see Table 1). On the flip side, achieving high annotation consistency proved to be a key challenge in human annotation, as we outline in Section 4.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.88604736328125, 203.11082458496094, 559.7215576171875, 518.6715087890625], "page": 3, "span": [0, 1854], "__ref_s3_data": null}]}, {"text": "4 ANNOTATION CAMPAIGN", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.66510009765625, 174.8409881591797, 470.2132568359375, 185.15008544921875], "page": 3, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "The annotation campaign was carried out in four phases. In phase one, we identified and prepared the data sources for annotation. In phase two, we determined the class labels and how annotations should be done on the documents in order to obtain maximum consistency. The latter was guided by a detailed requirement analysis and exhaustive experiments. In phase three, we trained the annotation staff and performed exams for quality assurance. In phase four,", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0245056152344, 85.38961791992188, 559.7138061523438, 160.93588256835938], "page": 3, "span": [0, 457], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.345272064208984, 723.0101318359375, 558.5491943359375, 732.1525268554688], "page": 4, "span": [0, 130], "__ref_s3_data": null}]}, {"text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [52.74671936035156, 676.2418212890625, 558.5100708007812, 707.6976928710938], "page": 4, "span": [0, 348], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"text": "Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [53.28383255004883, 185.58580017089844, 295.64874267578125, 237.99000549316406], "page": 4, "span": [0, 281], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"text": "we distributed the annotation workload and performed continuous quality controls. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.954681396484375, 116.45683288574219, 294.3648681640625, 158.3203887939453], "page": 4, "span": [0, 231], "__ref_s3_data": null}]}, {"text": "Phase 1: Data selection and preparation. Our inclusion criteria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.368797302246094, 83.57982635498047, 295.5584411621094, 114.14925384521484], "page": 4, "span": [0, 193], "__ref_s3_data": null}]}, {"text": "include publication repositories such as arXiv$^{3}$, government offices, company websites as well as data directory services for financial reports and patents. Scanned documents were excluded wherever possible because they can be rotated or skewed. This would not allow us to perform annotation with rectangular bounding-boxes and therefore complicate the annotation process.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.2582702636719, 416.48919677734375, 559.1853637695312, 481.0997619628906], "page": 4, "span": [0, 376], "__ref_s3_data": null}]}, {"text": "Preparation work included uploading and parsing the sourced PDF documents in the Corpus Conversion Service (CCS) [22], a cloud-native platform which provides a visual annotation interface and allows for dataset inspection and analysis. The annotation interface of CCS is shown in Figure 3. The desired balance of pages between the different document categories was achieved by selective subsampling of pages with certain desired properties. For example, we made sure to include the title page of each document and bias the remaining page selection to those with figures or tables. The latter was achieved by leveraging pre-trained object detection models from PubLayNet, which helped us estimate how many figures and tables a given page contains.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0777587890625, 284.9187316894531, 559.7130737304688, 415.02398681640625], "page": 4, "span": [0, 746], "__ref_s3_data": null}]}, {"text": "Phase 2: Label selection and guideline. We reviewed the collected documents and identified the most common structural features they exhibit. This was achieved by identifying recurrent layout elements and lead us to the definition of 11 distinct class labels. These 11 class labels are Caption , Footnote , Formula , List-item , Pagefooter , Page-header , Picture , Section-header , Table , Text , and Title . Critical factors that were considered for the choice of these class labels were (1) the overall occurrence of the label, (2) the specificity of the label, (3) recognisability on a single page (i.e. no need for context from previous or next page) and (4) overall coverage of the page. Specificity ensures that the choice of label is not ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and Affiliation , as seen in DocBank, are often only distinguishable by discriminating on", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.9024963378906, 98.9438247680664, 559.7176513671875, 283.8972473144531], "page": 4, "span": [0, 1159], "__ref_s3_data": null}]}, {"text": "$^{3}$https://arxiv.org/", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [317.7030029296875, 82.5821304321289, 369.40142822265625, 90.54422760009766], "page": 4, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.456207275390625, 723.0143432617188, 347.07373046875, 732.0245361328125], "page": 5, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [365.2621765136719, 723.0404663085938, 558.9374389648438, 731.9317626953125], "page": 5, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "the textual content of an element, which goes beyond visual layout recognition, in particular outside the Scientific Articles category.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.24338912963867, 684.8170166015625, 294.04541015625, 705.5283813476562], "page": 5, "span": [0, 135], "__ref_s3_data": null}]}, {"text": "At first sight, the task of visual document-layout interpretation appears intuitive enough to obtain plausible annotations in most cases. However, during early trial-runs in the core team, we observed many cases in which annotators use different annotation styles, especially for documents with challenging layouts. For example, if a figure is presented with subfigures, one annotator might draw a single figure bounding-box, while another might annotate each subfigure separately. The same applies for lists, where one might annotate all list items in one block or each list item separately. In essence, we observed that challenging layouts would be annotated in different but plausible ways. To illustrate this, we show in Figure 4 multiple examples of plausible but inconsistent annotations on the same pages.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.124725341796875, 542.8159790039062, 295.5592346191406, 683.8748168945312], "page": 5, "span": [0, 812], "__ref_s3_data": null}]}, {"text": "Obviously, this inconsistency in annotations is not desirable for datasets which are intended to be used for model training. To minimise these inconsistencies, we created a detailed annotation guideline. While perfect consistency across 40 annotation staff members is clearly not possible to achieve, we saw a huge improvement in annotation consistency after the introduction of our annotation guideline. A few selected, non-trivial highlights of the guideline are:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.339271545410156, 455.16583251953125, 295.56005859375, 541.1383666992188], "page": 5, "span": [0, 465], "__ref_s3_data": null}]}, {"text": "(1) Every list-item is an individual object instance with class label List-item . This definition is different from PubLayNet and DocBank, where all list-items are grouped together into one List object.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.39098358154297, 402.13092041015625, 294.42474365234375, 444.29510498046875], "page": 5, "span": [0, 202], "__ref_s3_data": null}]}, {"text": "(2) A List-item is a paragraph with hanging indentation. Singleline elements can qualify as List-item if the neighbour elements expose hanging indentation. Bullet or enumeration symbols are not a requirement.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.31100463867188, 358.39984130859375, 295.563720703125, 400.2758483886719], "page": 5, "span": [0, 208], "__ref_s3_data": null}]}, {"text": "(3) For every Caption , there must be exactly one corresponding Picture or Table .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.26787567138672, 336.4728698730469, 294.60943603515625, 356.2404479980469], "page": 5, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "(4) Connected sub-pictures are grouped together in one Picture object.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.2632064819336, 314.5648193359375, 294.7487487792969, 334.179443359375], "page": 5, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "(5) Formula numbers are included in a Formula object.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [63.9930305480957, 303.59686279296875, 264.5057067871094, 312.8252868652344], "page": 5, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "(6) Emphasised text (e.g. in italic or bold) at the beginning of a paragraph is not considered a Section-header , unless it appears exclusively on its own line.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [64.07823181152344, 270.048095703125, 295.0240783691406, 301.5160827636719], "page": 5, "span": [0, 160], "__ref_s3_data": null}]}, {"text": "The complete annotation guideline is over 100 pages long and a detailed description is obviously out of scope for this paper. Nevertheless, it will be made publicly available alongside with DocLayNet for future reference.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.994422912597656, 217.798828125, 295.5625305175781, 259.6097106933594], "page": 5, "span": [0, 221], "__ref_s3_data": null}]}, {"text": "Phase 3: Training. After a first trial with a small group of people, we realised that providing the annotation guideline and a set of random practice pages did not yield the desired quality level for layout annotation. Therefore we prepared a subset of pages with two different complexity levels, each with a practice and an exam part. 974 pages were reference-annotated by one proficient core team member. Annotation staff were then given the task to annotate the same subsets (blinded from the reference). By comparing the annotations of each staff member with the reference annotations, we could quantify how closely their annotations matched the reference. Only after passing two exam levels with high annotation quality, staff were admitted into the production phase. Practice iterations", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.26631546020508, 86.24749755859375, 295.562255859375, 215.95584106445312], "page": 5, "span": [0, 792], "__ref_s3_data": null}]}, {"text": "Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [316.9992980957031, 287.86785888671875, 559.8057861328125, 318.7776794433594], "page": 5, "span": [0, 173], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"text": "were carried out over a timeframe of 12 weeks, after which 8 of the 40 initially allocated annotators did not pass the bar.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.8349914550781, 247.1688232421875, 558.204345703125, 266.81207275390625], "page": 5, "span": [0, 123], "__ref_s3_data": null}]}, {"text": "Phase 4: Production annotation. The previously selected 80K pages were annotated with the defined 11 class labels by 32 annotators. This production phase took around three months to complete. All annotations were created online through CCS, which visualises the programmatic PDF text-cells as an overlay on the page. The page annotation are obtained by drawing rectangular bounding-boxes, as shown in Figure 3. With regard to the annotation practices, we implemented a few constraints and capabilities on the tooling level. First, we only allow non-overlapping, vertically oriented, rectangular boxes. For the large majority of documents, this constraint was sufficient and it speeds up the annotation considerably in comparison with arbitrary segmentation shapes. Second, annotator staff were not able to see each other's annotations. This was enforced by design to avoid any bias in the annotation, which could skew the numbers of the inter-annotator agreement (see Table 1). We wanted", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.00592041015625, 82.7375717163086, 559.7149047851562, 245.28392028808594], "page": 5, "span": [0, 987], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.30706024169922, 722.92333984375, 558.4274291992188, 732.1127319335938], "page": 6, "span": [0, 130], "__ref_s3_data": null}]}, {"text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.78031539916992, 608.98291015625, 295.64874267578125, 705.8385620117188], "page": 6, "span": [0, 489], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and Picture . For the latter, we instructed annotation staff to minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way to flag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in the final dataset. With all these measures in place, experienced annotation staff managed to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.25688552856445, 214.2948760986328, 295.5561218261719, 421.4337158203125], "page": 6, "span": [0, 1252], "__ref_s3_data": null}]}, {"text": "5 EXPERIMENTS", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.62337875366211, 193.5609893798828, 147.4853515625, 203.87008666992188], "page": 6, "span": [0, 13], "__ref_s3_data": null}]}, {"text": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.076290130615234, 82.4822006225586, 295.4281005859375, 179.65382385253906], "page": 6, "span": [0, 584], "__ref_s3_data": null}]}, {"text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [317.10931396484375, 449.6510009765625, 559.8057861328125, 513.7953491210938], "page": 6, "span": [0, 329], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"text": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.2011413574219, 388.6548156738281, 558.2041625976562, 408.8042297363281], "page": 6, "span": [0, 102], "__ref_s3_data": null}]}, {"text": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16].", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0830078125, 311.45587158203125, 558.4364013671875, 386.632568359375], "page": 6, "span": [0, 397], "__ref_s3_data": null}]}, {"text": "Baselines for Object Detection", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.1941223144531, 284.5037841796875, 466.8532409667969, 295.42913818359375], "page": 6, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of 1025 \u00d7 1025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as Text , Table and Picture . This is not entirely surprising, as Text , Table and Picture are abundant and the most visually distinctive in a document.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0144348144531, 85.2998275756836, 558.7822875976562, 280.8944396972656], "page": 6, "span": [0, 1146], "__ref_s3_data": null}]}, {"text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.35094451904297, 722.9555053710938, 347.0172424316406, 732.038818359375], "page": 7, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [365.1936950683594, 723.0802001953125, 558.7797241210938, 731.8773803710938], "page": 7, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [52.8690299987793, 663.3739624023438, 295.6486511230469, 705.8510131835938], "page": 7, "span": [0, 205], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/2"}, {"text": "Learning Curve", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.446834564208984, 461.592041015625, 131.05624389648438, 472.6955871582031], "page": 7, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "One of the fundamental questions related to any dataset is if it is \"large enough\". To answer this question for DocLayNet, we performed a data ablation study in which we evaluated a Mask R-CNN model trained on increasing fractions of the DocLayNet dataset. As can be seen in Figure 5, the mAP score rises sharply in the beginning and eventually levels out. To estimate the error-bar on the metrics, we ran the training five times on the entire data-set. This resulted in a 1% error-bar, depicted by the shaded area in Figure 5. In the inset of Figure 5, we show the exact same data-points, but with a logarithmic scale on the x-axis. As is expected, the mAP score increases linearly as a function of the data-size in the inset. The curve ultimately flattens out between the 80% and 100% mark, with the 80% mark falling within the error-bars of the 100% mark. This provides a good indication that the model would not improve significantly by yet increasing the data size. Rather, it would probably benefit more from improved data consistency (as discussed in Section 3), data augmentation methods [23], or the addition of more document categories and styles.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.78499984741211, 262.38037109375, 295.558349609375, 457.72955322265625], "page": 7, "span": [0, 1157], "__ref_s3_data": null}]}, {"text": "Impact of Class Labels", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.37664794921875, 239.1809844970703, 164.3289794921875, 250.044677734375], "page": 7, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "The choice and number of labels can have a significant effect on the overall model performance. Since PubLayNet, DocBank and DocLayNet all have different label sets, it is of particular interest to understand and quantify this influence of the label set on the model performance. We investigate this by either down-mapping labels into more common ones (e.g. Caption \u2192 Text ) or excluding them from the annotations entirely. Furthermore, it must be stressed that all mappings and exclusions were performed on the data before model training. In Table 3, we present the mAP scores for a Mask R-CNN R50 network on different label sets. Where a label is down-mapped, we show its corresponding label, otherwise it was excluded. We present three different label sets, with 6, 5 and 4 different labels respectively. The set of 5 labels contains the same labels as PubLayNet. However, due to the different definition of", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.06760787963867, 83.39567565917969, 295.5567932128906, 235.12689208984375], "page": 7, "span": [0, 910], "__ref_s3_data": null}]}, {"text": "Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [316.9989929199219, 663.7767944335938, 559.8068237304688, 705.6134643554688], "page": 7, "span": [0, 189], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/3"}, {"text": "lists in PubLayNet (grouped list-items) versus DocLayNet (separate list-items), the label set of size 4 is the closest to PubLayNet, in the assumption that the List is down-mapped to Text in PubLayNet. The results in Table 3 show that the prediction accuracy on the remaining class labels does not change significantly when other classes are merged into them. The overall macro-average improves by around 5%, in particular when Page-footer and Page-header are excluded.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.03326416015625, 375.50982666015625, 559.5849609375, 460.6855163574219], "page": 7, "span": [0, 469], "__ref_s3_data": null}]}, {"text": "Impact of Document Split in Train and Test Set", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.4661865234375, 351.4896545410156, 549.860595703125, 362.8900451660156], "page": 7, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "Many documents in DocLayNet have a unique styling. In order to avoid overfitting on a particular style, we have split the train-, test- and validation-sets of DocLayNet on document boundaries, i.e. every document contributes pages to only one set. To the best of our knowledge, this was not considered in PubLayNet or DocBank. To quantify how this affects model performance, we trained and evaluated a Mask R-CNN R50 model on a modified dataset version. Here, the train-, test- and validation-sets were obtained by a randomised draw over the individual pages. As can be seen in Table 4, the difference in model performance is surprisingly large: pagewise splitting gains \u02dc 10% in mAP over the document-wise splitting. Thus, random page-wise splitting of DocLayNet can easily lead to accidental overestimation of model performance and should be avoided.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.9546813964844, 196.5628204345703, 559.7138061523438, 348.10198974609375], "page": 7, "span": [0, 852], "__ref_s3_data": null}]}, {"text": "Dataset Comparison", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.3337707519531, 173.20875549316406, 418.5477600097656, 183.94322204589844], "page": 7, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "Throughout this paper, we claim that DocLayNet's wider variety of document layouts leads to more robust layout detection models. In Table 5, we provide evidence for that. We trained models on each of the available datasets (PubLayNet, DocBank and DocLayNet) and evaluated them on the test sets of the other datasets. Due to the different label sets and annotation styles, a direct comparison is not possible. Hence, we focussed on the common labels among the datasets. Between PubLayNet and DocLayNet, these are Picture ,", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [316.7283935546875, 83.24566650390625, 559.1881713867188, 168.86700439453125], "page": 7, "span": [0, 521], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.288330078125, 722.9171142578125, 558.4634399414062, 732.134033203125], "page": 8, "span": [0, 130], "__ref_s3_data": null}]}, {"text": "Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [52.89757537841797, 641.85888671875, 295.648681640625, 705.7824096679688], "page": 8, "span": [0, 298], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/4"}, {"text": "Section-header , Table and Text . Before training, we either mapped or excluded DocLayNet's other labels as specified in table 3, and also PubLayNet's List to Text . Note that the different clustering of lists (by list-element vs. whole list objects) naturally decreases the mAP score for Text .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.279537200927734, 348.85986328125, 294.6396789550781, 401.5162658691406], "page": 8, "span": [0, 295], "__ref_s3_data": null}]}, {"text": "For comparison of DocBank with DocLayNet, we trained only on Picture and Table clusters of each dataset. We had to exclude Text because successive paragraphs are often grouped together into a single object in DocBank. This paragraph grouping is incompatible with the individual paragraphs of DocLayNet. As can be seen in Table 5, DocLayNet trained models yield better performance compared to the previous datasets. It is noteworthy that the models trained on PubLayNet and DocBank perform very well on their own test set, but have a much lower performance on the foreign datasets. While this also applies to DocLayNet, the difference is far less pronounced. Thus we conclude that DocLayNet trained models are overall more robust and will produce better results for challenging, unseen layouts.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.04817581176758, 205.98951721191406, 295.55908203125, 346.9607849121094], "page": 8, "span": [0, 793], "__ref_s3_data": null}]}, {"text": "Example Predictions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [53.05388259887695, 176.33340454101562, 156.02235412597656, 187.29098510742188], "page": 8, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "To conclude this section, we illustrate the quality of layout predictions one can expect from DocLayNet-trained models by providing a selection of examples without any further post-processing applied. Figure 6 shows selected layout predictions on pages from the test-set of DocLayNet. Results look decent in general across document categories, however one can also observe mistakes such as overlapping clusters of different classes, or entirely missing boxes due to low confidence.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [53.07720184326172, 86.64982604980469, 295.5584411621094, 172.26492309570312], "page": 8, "span": [0, 481], "__ref_s3_data": null}]}, {"text": "6 CONCLUSION", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.4961853027344, 695.8309936523438, 405.7296142578125, 706.4700317382812], "page": 8, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "In this paper, we presented the DocLayNet dataset. It provides the document conversion and layout analysis research community a new and challenging dataset to improve and fine-tune novel ML methods on. In contrast to many other datasets, DocLayNet was created by human annotation in order to obtain reliable layout ground-truth on a wide variety of publication- and typesettingstyles. Including a large proportion of documents outside the scientific publishing domain adds significant value in this respect.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.0487976074219, 605.4117431640625, 559.7137451171875, 691.6207275390625], "page": 8, "span": [0, 507], "__ref_s3_data": null}]}, {"text": "From the dataset, we have derived on the one hand reference metrics for human performance on document-layout annotation (through double and triple annotations) and on the other hand evaluated the baseline performance of commonly used object detection methods. We also illustrated the impact of various dataset-related aspects on model performance through data-ablation experiments, both from a size and class-label perspective. Last but not least, we compared the accuracy of models trained on other public datasets and showed that DocLayNet trained models are more robust.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.03955078125, 506.7440185546875, 559.717041015625, 603.672607421875], "page": 8, "span": [0, 573], "__ref_s3_data": null}]}, {"text": "To date, there is still a significant gap between human and ML accuracy on the layout interpretation task, and we hope that this work will inspire the research community to close that gap.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [317.1865234375, 474.2935791015625, 558.6325073242188, 505.4895324707031], "page": 8, "span": [0, 188], "__ref_s3_data": null}]}, {"text": "REFERENCES", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [317.4455871582031, 446.5990295410156, 387.5806579589844, 457.4013366699219], "page": 8, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "[1] Max G\u00f6bel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.5848693847656, 420.8371276855469, 559.0187377929688, 444.4063415527344], "page": 8, "span": [0, 191], "__ref_s3_data": null}]}, {"text": "[2] Christian Clausner, Apostolos Antonacopoulos, and Stefan Pletschacher. Icdar2017 competition on recognition of documents with complex layouts rdcl2017. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 1404-1410, 2017.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.76806640625, 388.9571228027344, 559.7276000976562, 420.2254333496094], "page": 8, "span": [0, 279], "__ref_s3_data": null}]}, {"text": "[3] Herv\u00e9 D\u00e9jean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), April 2019. http://sac.founderit.com/.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.58111572265625, 364.88128662109375, 558.4269409179688, 388.028076171875], "page": 8, "span": [0, 213], "__ref_s3_data": null}]}, {"text": "[4] Antonio Jimeno Yepes, Peter Zhong, and Douglas Burdick. Competition on scientific literature parsing. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 605-617. LNCS 12824, SpringerVerlag, sep 2021.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.72210693359375, 333.173095703125, 559.3787231445312, 364.17962646484375], "page": 8, "span": [0, 251], "__ref_s3_data": null}]}, {"text": "[5] Logan Markewich, Hao Zhang, Yubin Xing, Navid Lambert-Shirzad, Jiang Zhexin, Roy Lee, Zhi Li, and Seok-Bum Ko. Segmentation for document layout analysis: not dead yet. International Journal on Document Analysis and Recognition (IJDAR) , pages 1-11, 01 2022.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.47723388671875, 300.9960021972656, 559.2555541992188, 332.2057800292969], "page": 8, "span": [0, 261], "__ref_s3_data": null}]}, {"text": "[6] Xu Zhong, Jianbin Tang, and Antonio Jimeno-Yepes. Publaynet: Largest dataset ever for document layout analysis. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 1015-1022, sep 2019.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.7210998535156, 277.3751220703125, 558.6044921875, 300.1542053222656], "page": 8, "span": [0, 235], "__ref_s3_data": null}]}, {"text": "[7] Minghao Li, Yiheng Xu, Lei Cui, Shaohan Huang, Furu Wei, Zhoujun Li, and Ming Zhou. Docbank: A benchmark dataset for document layout analysis. In Proceedings of the 28th International Conference on Computational Linguistics , COLING, pages 949-960. International Committee on Computational Linguistics, dec 2020.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.7048034667969, 237.53111267089844, 559.0962524414062, 276.57550048828125], "page": 8, "span": [0, 316], "__ref_s3_data": null}]}, {"text": "[8] Riaz Ahmad, Muhammad Tanvir Afzal, and M. Qadir. Information extraction from pdf sources based on rule-based system using integrated formats. In SemWebEval@ESWC , 2016.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.6175537109375, 213.6141357421875, 558.9022216796875, 236.84490966796875], "page": 8, "span": [0, 172], "__ref_s3_data": null}]}, {"text": "[9] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In IEEE Conference on Computer Vision and Pattern Recognition , CVPR, pages 580-587. IEEE Computer Society, jun 2014.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [320.695556640625, 181.74110412597656, 559.2744750976562, 212.77767944335938], "page": 8, "span": [0, 271], "__ref_s3_data": null}]}, {"text": "[10] Ross B. Girshick. Fast R-CNN. In 2015 IEEE International Conference on Computer Vision , ICCV, pages 1440-1448. IEEE Computer Society, dec 2015.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.74908447265625, 165.5072479248047, 558.8585205078125, 181.0753173828125], "page": 8, "span": [0, 149], "__ref_s3_data": null}]}, {"text": "[11] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis and Machine Intelligence , 39(6):1137-1149, 2017.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.71527099609375, 141.8831329345703, 558.4170532226562, 164.63047790527344], "page": 8, "span": [0, 227], "__ref_s3_data": null}]}, {"text": "[12] Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross B. Girshick. Mask R-CNN. In IEEE International Conference on Computer Vision , ICCV, pages 2980-2988. IEEE Computer Society, Oct 2017.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.5010986328125, 117.60646057128906, 559.278076171875, 141.50643920898438], "page": 8, "span": [0, 192], "__ref_s3_data": null}]}, {"text": "[13] Glenn Jocher, Alex Stoken, Ayush Chaurasia, Jirka Borovec, NanoCode012, TaoXie, Yonghye Kwon, Kalen Michael, Liu Changyu, Jiacong Fang, Abhiram V, Laughing, tkianai, yxNONG, Piotr Skalski, Adam Hogan, Jebastin Nadar, imyhxy, Lorenzo Mammana, Alex Wang, Cristi Fati, Diego Montes, Jan Hajek, Laurentiu", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.4837341308594, 86.09910583496094, 559.0487670898438, 116.94155883789062], "page": 8, "span": [0, 305], "__ref_s3_data": null}]}, {"text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [53.55940246582031, 722.9329223632812, 347.0838623046875, 731.9924926757812], "page": 9, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [365.1275329589844, 723.0497436523438, 558.905029296875, 731.96435546875], "page": 9, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [53.39582824707031, 285.65704345703125, 559.807861328125, 328.056396484375], "page": 9, "span": [0, 386], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"text": "Diaconu, Mai Thanh Minh, Marc, albinxavi, fatih, oleg, and wanghao yang. ultralytics/yolov5: v6.0 - yolov5n nano models, roboflow integration, tensorflow export, opencv dnn support, October 2021.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [68.69137573242188, 242.22409057617188, 295.22406005859375, 265.4314270019531], "page": 9, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "[14] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-end object detection with transformers. CoRR , abs/2005.12872, 2020.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [53.56020736694336, 218.56314086914062, 295.12176513671875, 241.63282775878906], "page": 9, "span": [0, 190], "__ref_s3_data": null}]}, {"text": "[15] Mingxing Tan, Ruoming Pang, and Quoc V. Le. Efficientdet: Scalable and efficient object detection. CoRR , abs/1911.09070, 2019.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [53.61275863647461, 202.62213134765625, 294.3653869628906, 217.57615661621094], "page": 9, "span": [0, 132], "__ref_s3_data": null}]}, {"text": "[16] Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. Microsoft COCO: common objects in context, 2014.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [53.668941497802734, 178.71910095214844, 295.2226257324219, 201.57443237304688], "page": 9, "span": [0, 219], "__ref_s3_data": null}]}, {"text": "[17] Yuxin Wu, Alexander Kirillov, Francisco Massa, Wan-Yen Lo, and Ross Girshick. Detectron2, 2019.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [53.54263687133789, 162.77911376953125, 295.1200866699219, 178.3345947265625], "page": 9, "span": [0, 100], "__ref_s3_data": null}]}, {"text": "[18] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter W. J. Staar. Robust pdf document conversion using recurrent neural networks. In Proceedings of the 35th Conference on Artificial Intelligence , AAAI, pages 1513715145, feb 2021.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [53.569610595703125, 122.92810821533203, 294.8847351074219, 162.23497009277344], "page": 9, "span": [0, 339], "__ref_s3_data": null}]}, {"text": "[19] Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. Layoutlm: Pre-training of text and layout for document image understanding. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 1192-1200, New York, USA, 2020. Association for Computing Machinery.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [53.4610595703125, 82.67352294921875, 295.22174072265625, 122.19474029541016], "page": 9, "span": [0, 336], "__ref_s3_data": null}]}, {"text": "[20] Shoubin Li, Xuyan Ma, Shuaiqun Pan, Jun Hu, Lin Shi, and Qing Wang. Vtlayout: Fusion of visual and text features for document layout analysis, 2021.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.6278076171875, 249.62921142578125, 559.0263671875, 265.5798645019531], "page": 9, "span": [0, 153], "__ref_s3_data": null}]}, {"text": "[21] Peng Zhang, Can Li, Liang Qiao, Zhanzhan Cheng, Shiliang Pu, Yi Niu, and Fei Wu. Vsr: A unified framework for document layout analysis combining vision, semantics and relations, 2021.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.53033447265625, 226.54010009765625, 559.0158081054688, 249.28826904296875], "page": 9, "span": [0, 188], "__ref_s3_data": null}]}, {"text": "[22] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 774-782. ACM, 2018.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.6616516113281, 194.28546142578125, 559.275390625, 225.54457092285156], "page": 9, "span": [0, 290], "__ref_s3_data": null}]}, {"text": "[23] Connor Shorten and Taghi M. Khoshgoftaar. A survey on image data augmentation for deep learning. Journal of Big Data , 6(1):60, 2019.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [317.65606689453125, 178.71212768554688, 559.3782958984375, 193.30506896972656], "page": 9, "span": [0, 138], "__ref_s3_data": null}]}], "figures": [{"bounding-box": null, "prov": [{"bbox": [324.3027038574219, 266.1221618652344, 554.91796875, 543.5838623046875], "page": 1, "span": [0, 84], "__ref_s3_data": null}], "text": "Figure 1: Four examples of complex page layouts across different document categories", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [88.16680145263672, 569.726806640625, 264.2818298339844, 698.8894653320312], "page": 3, "span": [0, 69], "__ref_s3_data": null}], "text": "Figure 2: Distribution of DocLayNet pages across document categories.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [53.179771423339844, 250.80191040039062, 295.3565368652344, 481.6382141113281], "page": 4, "span": [0, 281], "__ref_s3_data": null}], "text": "Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [315.8857116699219, 331.43994140625, 559.6527709960938, 707.0224609375], "page": 5, "span": [0, 173], "__ref_s3_data": null}], "text": "Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [322.7086486816406, 531.372314453125, 553.7246704101562, 701.6975708007812], "page": 6, "span": [0, 329], "__ref_s3_data": null}], "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [53.59891891479492, 343.73516845703125, 554.9424438476562, 708.443115234375], "page": 9, "span": [0, 386], "__ref_s3_data": null}], "text": "Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes.", "type": "figure"}], "tables": [{"bounding-box": null, "prov": [{"bbox": [98.96420288085938, 498.30108642578125, 512.7739868164062, 654.1231689453125], "page": 4, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges.", "type": "table", "#-cols": 12, "#-rows": 14, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [233.94400024414062, 643.40185546875, 270.042724609375, 651.7764892578125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "% of Total", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [233.94400024414062, 643.40185546875, 270.042724609375, 651.7764892578125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "% of Total", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [233.94400024414062, 643.40185546875, 270.042724609375, 651.7764892578125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "% of Total", "type": "col_header", "col": 4, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 6, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 7, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 8, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 9, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 10, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 11, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [104.82499694824219, 632.4428100585938, 141.7127685546875, 640.8174438476562], "spans": [[1, 0]], "text": "class label", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [175.94700622558594, 632.4428100585938, 198.7126922607422, 640.8174438476562], "spans": [[1, 1]], "text": "Count", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [213.7949981689453, 632.4428100585938, 233.69143676757812, 640.8174438476562], "spans": [[1, 2]], "text": "Train", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [249.37367248535156, 632.4428100585938, 264.5, 640.8174438476562], "spans": [[1, 3]], "text": "Test", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [283.5356750488281, 632.4428100585938, 295.3085632324219, 640.8174438476562], "spans": [[1, 4]], "text": "Val", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [314.0150146484375, 632.4428100585938, 324.9809265136719, 640.8174438476562], "spans": [[1, 5]], "text": "All", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [343.0123596191406, 632.4428100585938, 354.6507568359375, 640.8174438476562], "spans": [[1, 6]], "text": "Fin", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [367.84033203125, 632.4428100585938, 384.3205871582031, 640.8174438476562], "spans": [[1, 7]], "text": "Man", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [407.5435791015625, 632.4428100585938, 418.1597900390625, 640.8174438476562], "spans": [[1, 8]], "text": "Sci", "type": "col_header", "col": 8, "col-header": false, "col-span": [8, 9], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [432.2998046875, 632.4428100585938, 447.8296203613281, 640.8174438476562], "spans": [[1, 9]], "text": "Law", "type": "col_header", "col": 9, "col-header": false, "col-span": [9, 10], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [465.7265625, 632.4428100585938, 477.5084228515625, 640.8174438476562], "spans": [[1, 10]], "text": "Pat", "type": "col_header", "col": 10, "col-header": false, "col-span": [10, 11], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [493.52239990234375, 632.4428100585938, 507.17822265625, 640.8174438476562], "spans": [[1, 11]], "text": "Ten", "type": "col_header", "col": 11, "col-header": false, "col-span": [11, 12], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [104.82499694824219, 621.0858154296875, 134.01063537597656, 629.46044921875], "spans": [[2, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [177.86599731445312, 621.0858154296875, 198.71287536621094, 629.46044921875], "spans": [[2, 1]], "text": "22524", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [219.21099853515625, 621.0858154296875, 233.69174194335938, 629.46044921875], "spans": [[2, 2]], "text": "2.04", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [250.01956176757812, 621.0858154296875, 264.50030517578125, 629.46044921875], "spans": [[2, 3]], "text": "1.77", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [280.828125, 621.0858154296875, 295.3088684082031, 629.46044921875], "spans": [[2, 4]], "text": "2.32", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [305.27301025390625, 621.0858154296875, 324.9811706542969, 629.46044921875], "spans": [[2, 5]], "text": "84-89", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [334.9428405761719, 621.0858154296875, 354.6510009765625, 629.46044921875], "spans": [[2, 6]], "text": "40-61", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [364.6126708984375, 621.0858154296875, 384.3208312988281, 629.46044921875], "spans": [[2, 7]], "text": "86-92", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [398.4518737792969, 621.0858154296875, 418.1600341796875, 629.46044921875], "spans": [[2, 8]], "text": "94-99", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [428.1217041015625, 621.0858154296875, 447.8298645019531, 629.46044921875], "spans": [[2, 9]], "text": "95-99", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [457.8005065917969, 621.0858154296875, 477.5086669921875, 629.46044921875], "spans": [[2, 10]], "text": "69-78", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [495.32489013671875, 621.0858154296875, 507.178466796875, 629.46044921875], "spans": [[2, 11]], "text": "n/a", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [104.82499694824219, 610.1268310546875, 137.3282012939453, 618.50146484375], "spans": [[3, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [182.03500366210938, 610.1268310546875, 198.71250915527344, 618.50146484375], "spans": [[3, 1]], "text": "6318", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [219.21099853515625, 610.1268310546875, 233.69174194335938, 618.50146484375], "spans": [[3, 2]], "text": "0.60", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [250.01956176757812, 610.1268310546875, 264.50030517578125, 618.50146484375], "spans": [[3, 3]], "text": "0.31", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [280.828125, 610.1268310546875, 295.3088684082031, 618.50146484375], "spans": [[3, 4]], "text": "0.58", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [305.27301025390625, 610.1268310546875, 324.9811706542969, 618.50146484375], "spans": [[3, 5]], "text": "83-91", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [342.7973937988281, 610.1268310546875, 354.6509704589844, 618.50146484375], "spans": [[3, 6]], "text": "n/a", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [371.8126525878906, 610.1268310546875, 384.3207702636719, 618.50146484375], "spans": [[3, 7]], "text": "100", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [398.4518127441406, 610.1268310546875, 418.15997314453125, 618.50146484375], "spans": [[3, 8]], "text": "62-88", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [428.12164306640625, 610.1268310546875, 447.8298034667969, 618.50146484375], "spans": [[3, 9]], "text": "85-94", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [465.6549987792969, 610.1268310546875, 477.5085754394531, 618.50146484375], "spans": [[3, 10]], "text": "n/a", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [487.4702453613281, 610.1268310546875, 507.17840576171875, 618.50146484375], "spans": [[3, 11]], "text": "82-97", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [104.82499694824219, 599.1678466796875, 135.33766174316406, 607.54248046875], "spans": [[4, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [177.86599731445312, 599.1678466796875, 198.71287536621094, 607.54248046875], "spans": [[4, 1]], "text": "25027", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [219.21099853515625, 599.1678466796875, 233.69174194335938, 607.54248046875], "spans": [[4, 2]], "text": "2.25", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [250.01956176757812, 599.1678466796875, 264.50030517578125, 607.54248046875], "spans": [[4, 3]], "text": "1.90", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [280.828125, 599.1678466796875, 295.3088684082031, 607.54248046875], "spans": [[4, 4]], "text": "2.96", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [305.27301025390625, 599.1678466796875, 324.9811706542969, 607.54248046875], "spans": [[4, 5]], "text": "83-85", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [342.7973937988281, 599.1678466796875, 354.6509704589844, 607.54248046875], "spans": [[4, 6]], "text": "n/a", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [372.4671936035156, 599.1678466796875, 384.3207702636719, 607.54248046875], "spans": [[4, 7]], "text": "n/a", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [398.4518127441406, 599.1678466796875, 418.15997314453125, 607.54248046875], "spans": [[4, 8]], "text": "84-87", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [428.12164306640625, 599.1678466796875, 447.8298034667969, 607.54248046875], "spans": [[4, 9]], "text": "86-96", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [465.6549987792969, 599.1678466796875, 477.5085754394531, 607.54248046875], "spans": [[4, 10]], "text": "n/a", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [495.3247985839844, 599.1678466796875, 507.1783752441406, 607.54248046875], "spans": [[4, 11]], "text": "n/a", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [104.82499694824219, 588.2088012695312, 137.7047882080078, 596.5834350585938], "spans": [[5, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [173.69700622558594, 588.2088012695312, 198.7132568359375, 596.5834350585938], "spans": [[5, 1]], "text": "185660", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [215.04200744628906, 588.2088012695312, 233.69212341308594, 596.5834350585938], "spans": [[5, 2]], "text": "17.19", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [245.85055541992188, 588.2088012695312, 264.50067138671875, 596.5834350585938], "spans": [[5, 3]], "text": "13.34", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [276.65911865234375, 588.2088012695312, 295.3092346191406, 596.5834350585938], "spans": [[5, 4]], "text": "15.82", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [305.27301025390625, 588.2088012695312, 324.9811706542969, 596.5834350585938], "spans": [[5, 5]], "text": "87-88", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [334.9428405761719, 588.2088012695312, 354.6510009765625, 596.5834350585938], "spans": [[5, 6]], "text": "74-83", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [364.6126708984375, 588.2088012695312, 384.3208312988281, 596.5834350585938], "spans": [[5, 7]], "text": "90-92", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [398.4518737792969, 588.2088012695312, 418.1600341796875, 596.5834350585938], "spans": [[5, 8]], "text": "97-97", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [428.1217041015625, 588.2088012695312, 447.8298645019531, 596.5834350585938], "spans": [[5, 9]], "text": "81-85", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [457.8005065917969, 588.2088012695312, 477.5086669921875, 596.5834350585938], "spans": [[5, 10]], "text": "75-88", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [487.4703369140625, 588.2088012695312, 507.1784973144531, 596.5834350585938], "spans": [[5, 11]], "text": "93-95", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [104.82499694824219, 577.2498168945312, 147.3526153564453, 585.6244506835938], "spans": [[6, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [177.86599731445312, 577.2498168945312, 198.71287536621094, 585.6244506835938], "spans": [[6, 1]], "text": "70878", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [219.21099853515625, 577.2498168945312, 233.69174194335938, 585.6244506835938], "spans": [[6, 2]], "text": "6.51", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [250.01956176757812, 577.2498168945312, 264.50030517578125, 585.6244506835938], "spans": [[6, 3]], "text": "5.58", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [280.828125, 577.2498168945312, 295.3088684082031, 585.6244506835938], "spans": [[6, 4]], "text": "6.00", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [305.27301025390625, 577.2498168945312, 324.9811706542969, 585.6244506835938], "spans": [[6, 5]], "text": "93-94", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [334.9428405761719, 577.2498168945312, 354.6510009765625, 585.6244506835938], "spans": [[6, 6]], "text": "88-90", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [364.6126708984375, 577.2498168945312, 384.3208312988281, 585.6244506835938], "spans": [[6, 7]], "text": "95-96", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [405.6518859863281, 577.2498168945312, 418.1600036621094, 585.6244506835938], "spans": [[6, 8]], "text": "100", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [428.1216735839844, 577.2498168945312, 447.829833984375, 585.6244506835938], "spans": [[6, 9]], "text": "92-97", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [465.00048828125, 577.2498168945312, 477.50860595703125, 585.6244506835938], "spans": [[6, 10]], "text": "100", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [487.47027587890625, 577.2498168945312, 507.1784362792969, 585.6244506835938], "spans": [[6, 11]], "text": "96-98", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [104.82499694824219, 566.2908325195312, 150.10531616210938, 574.6654663085938], "spans": [[7, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [177.86599731445312, 566.2908325195312, 198.71287536621094, 574.6654663085938], "spans": [[7, 1]], "text": "58022", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [219.21099853515625, 566.2908325195312, 233.69174194335938, 574.6654663085938], "spans": [[7, 2]], "text": "5.10", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [250.01956176757812, 566.2908325195312, 264.50030517578125, 574.6654663085938], "spans": [[7, 3]], "text": "6.70", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [280.828125, 566.2908325195312, 295.3088684082031, 574.6654663085938], "spans": [[7, 4]], "text": "5.06", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [305.27301025390625, 566.2908325195312, 324.9811706542969, 574.6654663085938], "spans": [[7, 5]], "text": "85-89", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [334.9428405761719, 566.2908325195312, 354.6510009765625, 574.6654663085938], "spans": [[7, 6]], "text": "66-76", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [364.6126708984375, 566.2908325195312, 384.3208312988281, 574.6654663085938], "spans": [[7, 7]], "text": "90-94", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [394.2825012207031, 566.2908325195312, 418.1600341796875, 574.6654663085938], "spans": [[7, 8]], "text": "98-100", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [428.1217041015625, 566.2908325195312, 447.8298645019531, 574.6654663085938], "spans": [[7, 9]], "text": "91-92", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [457.8005065917969, 566.2908325195312, 477.5086669921875, 574.6654663085938], "spans": [[7, 10]], "text": "97-99", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [487.4703369140625, 566.2908325195312, 507.1784973144531, 574.6654663085938], "spans": [[7, 11]], "text": "81-86", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [104.82499694824219, 555.3318481445312, 130.80963134765625, 563.7064819335938], "spans": [[8, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [177.86599731445312, 555.3318481445312, 198.71287536621094, 563.7064819335938], "spans": [[8, 1]], "text": "45976", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [219.21099853515625, 555.3318481445312, 233.69174194335938, 563.7064819335938], "spans": [[8, 2]], "text": "4.21", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [250.01956176757812, 555.3318481445312, 264.50030517578125, 563.7064819335938], "spans": [[8, 3]], "text": "2.78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [280.828125, 555.3318481445312, 295.3088684082031, 563.7064819335938], "spans": [[8, 4]], "text": "5.31", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [305.27301025390625, 555.3318481445312, 324.9811706542969, 563.7064819335938], "spans": [[8, 5]], "text": "69-71", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [334.9428405761719, 555.3318481445312, 354.6510009765625, 563.7064819335938], "spans": [[8, 6]], "text": "56-59", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [364.6126708984375, 555.3318481445312, 384.3208312988281, 563.7064819335938], "spans": [[8, 7]], "text": "82-86", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [398.4518737792969, 555.3318481445312, 418.1600341796875, 563.7064819335938], "spans": [[8, 8]], "text": "69-82", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [428.1217041015625, 555.3318481445312, 447.8298645019531, 563.7064819335938], "spans": [[8, 9]], "text": "80-95", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [457.8005065917969, 555.3318481445312, 477.5086669921875, 563.7064819335938], "spans": [[8, 10]], "text": "66-71", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [487.4703369140625, 555.3318481445312, 507.1784973144531, 563.7064819335938], "spans": [[8, 11]], "text": "59-76", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [104.82499694824219, 544.372802734375, 159.5648651123047, 552.7474365234375], "spans": [[9, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [173.69700622558594, 544.372802734375, 198.7132568359375, 552.7474365234375], "spans": [[9, 1]], "text": "142884", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [215.04200744628906, 544.372802734375, 233.69212341308594, 552.7474365234375], "spans": [[9, 2]], "text": "12.60", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [245.85055541992188, 544.372802734375, 264.50067138671875, 552.7474365234375], "spans": [[9, 3]], "text": "15.77", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [276.65911865234375, 544.372802734375, 295.3092346191406, 552.7474365234375], "spans": [[9, 4]], "text": "12.85", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [305.27301025390625, 544.372802734375, 324.9811706542969, 552.7474365234375], "spans": [[9, 5]], "text": "83-84", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [334.9428405761719, 544.372802734375, 354.6510009765625, 552.7474365234375], "spans": [[9, 6]], "text": "76-81", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [364.6126708984375, 544.372802734375, 384.3208312988281, 552.7474365234375], "spans": [[9, 7]], "text": "90-92", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [398.4518737792969, 544.372802734375, 418.1600341796875, 552.7474365234375], "spans": [[9, 8]], "text": "94-95", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [428.1217041015625, 544.372802734375, 447.8298645019531, 552.7474365234375], "spans": [[9, 9]], "text": "87-94", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [457.8005065917969, 544.372802734375, 477.5086669921875, 552.7474365234375], "spans": [[9, 10]], "text": "69-73", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [487.4703369140625, 544.372802734375, 507.1784973144531, 552.7474365234375], "spans": [[9, 11]], "text": "78-86", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [104.82499694824219, 533.413818359375, 124.63176727294922, 541.7884521484375], "spans": [[10, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [177.86599731445312, 533.413818359375, 198.71287536621094, 541.7884521484375], "spans": [[10, 1]], "text": "34733", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [219.21099853515625, 533.413818359375, 233.69174194335938, 541.7884521484375], "spans": [[10, 2]], "text": "3.20", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [250.01956176757812, 533.413818359375, 264.50030517578125, 541.7884521484375], "spans": [[10, 3]], "text": "2.27", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [280.828125, 533.413818359375, 295.3088684082031, 541.7884521484375], "spans": [[10, 4]], "text": "3.60", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [305.27301025390625, 533.413818359375, 324.9811706542969, 541.7884521484375], "spans": [[10, 5]], "text": "77-81", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [334.9428405761719, 533.413818359375, 354.6510009765625, 541.7884521484375], "spans": [[10, 6]], "text": "75-80", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [364.6126708984375, 533.413818359375, 384.3208312988281, 541.7884521484375], "spans": [[10, 7]], "text": "83-86", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [398.4518737792969, 533.413818359375, 418.1600341796875, 541.7884521484375], "spans": [[10, 8]], "text": "98-99", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [428.1217041015625, 533.413818359375, 447.8298645019531, 541.7884521484375], "spans": [[10, 9]], "text": "58-80", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [457.8005065917969, 533.413818359375, 477.5086669921875, 541.7884521484375], "spans": [[10, 10]], "text": "79-84", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [487.4703369140625, 533.413818359375, 507.1784973144531, 541.7884521484375], "spans": [[10, 11]], "text": "70-85", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [104.82499694824219, 522.455810546875, 120.78518676757812, 530.8304443359375], "spans": [[11, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [173.69700622558594, 522.455810546875, 198.7132568359375, 530.8304443359375], "spans": [[11, 1]], "text": "510377", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [215.04200744628906, 522.455810546875, 233.69212341308594, 530.8304443359375], "spans": [[11, 2]], "text": "45.82", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [245.85055541992188, 522.455810546875, 264.50067138671875, 530.8304443359375], "spans": [[11, 3]], "text": "49.28", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [276.65911865234375, 522.455810546875, 295.3092346191406, 530.8304443359375], "spans": [[11, 4]], "text": "45.00", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [305.27301025390625, 522.455810546875, 324.9811706542969, 530.8304443359375], "spans": [[11, 5]], "text": "84-86", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [334.9428405761719, 522.455810546875, 354.6510009765625, 530.8304443359375], "spans": [[11, 6]], "text": "81-86", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [364.6126708984375, 522.455810546875, 384.3208312988281, 530.8304443359375], "spans": [[11, 7]], "text": "88-93", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [398.4518737792969, 522.455810546875, 418.1600341796875, 530.8304443359375], "spans": [[11, 8]], "text": "89-93", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [428.1217041015625, 522.455810546875, 447.8298645019531, 530.8304443359375], "spans": [[11, 9]], "text": "87-92", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [457.8005065917969, 522.455810546875, 477.5086669921875, 530.8304443359375], "spans": [[11, 10]], "text": "71-79", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [487.4703369140625, 522.455810546875, 507.1784973144531, 530.8304443359375], "spans": [[11, 11]], "text": "87-95", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [104.82499694824219, 511.496826171875, 121.81632995605469, 519.8714599609375], "spans": [[12, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [182.03500366210938, 511.496826171875, 198.71250915527344, 519.8714599609375], "spans": [[12, 1]], "text": "5071", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [219.21099853515625, 511.496826171875, 233.69174194335938, 519.8714599609375], "spans": [[12, 2]], "text": "0.47", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [250.01956176757812, 511.496826171875, 264.50030517578125, 519.8714599609375], "spans": [[12, 3]], "text": "0.30", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [280.828125, 511.496826171875, 295.3088684082031, 519.8714599609375], "spans": [[12, 4]], "text": "0.50", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [305.27301025390625, 511.496826171875, 324.9811706542969, 519.8714599609375], "spans": [[12, 5]], "text": "60-72", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [334.9428405761719, 511.496826171875, 354.6510009765625, 519.8714599609375], "spans": [[12, 6]], "text": "24-63", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [364.6126708984375, 511.496826171875, 384.3208312988281, 519.8714599609375], "spans": [[12, 7]], "text": "50-63", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [394.2825012207031, 511.496826171875, 418.1600341796875, 519.8714599609375], "spans": [[12, 8]], "text": "94-100", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [428.1217041015625, 511.496826171875, 447.8298645019531, 519.8714599609375], "spans": [[12, 9]], "text": "82-96", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [457.8005065917969, 511.496826171875, 477.5086669921875, 519.8714599609375], "spans": [[12, 10]], "text": "68-79", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [487.4703369140625, 511.496826171875, 507.1784973144531, 519.8714599609375], "spans": [[12, 11]], "text": "24-56", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [104.82499694824219, 500.1388244628906, 123.43028259277344, 508.5134582519531], "spans": [[13, 0]], "text": "Total", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [169.52699279785156, 500.1388244628906, 198.71263122558594, 508.5134582519531], "spans": [[13, 1]], "text": "1107470", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [208.6750030517578, 500.1388244628906, 233.69125366210938, 508.5134582519531], "spans": [[13, 2]], "text": "941123", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [243.65292358398438, 500.1388244628906, 264.49981689453125, 508.5134582519531], "spans": [[13, 3]], "text": "99816", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [274.46148681640625, 500.1388244628906, 295.3083801269531, 508.5134582519531], "spans": [[13, 4]], "text": "66531", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [305.27301025390625, 500.1388244628906, 324.9811706542969, 508.5134582519531], "spans": [[13, 5]], "text": "82-83", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [334.9428405761719, 500.1388244628906, 354.6510009765625, 508.5134582519531], "spans": [[13, 6]], "text": "71-74", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [364.6126708984375, 500.1388244628906, 384.3208312988281, 508.5134582519531], "spans": [[13, 7]], "text": "79-81", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [398.4518737792969, 500.1388244628906, 418.1600341796875, 508.5134582519531], "spans": [[13, 8]], "text": "89-94", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [428.1217041015625, 500.1388244628906, 447.8298645019531, 508.5134582519531], "spans": [[13, 9]], "text": "86-91", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [457.8005065917969, 500.1388244628906, 477.5086669921875, 508.5134582519531], "spans": [[13, 10]], "text": "71-76", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [487.4703369140625, 500.1388244628906, 507.1784973144531, 508.5134582519531], "spans": [[13, 11]], "text": "68-85", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [61.93328094482422, 440.30438232421875, 285.75616455078125, 596.587158203125], "page": 6, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", "type": "table", "#-cols": 6, "#-rows": 14, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [132.36500549316406, 585.65185546875, 157.99098205566406, 594.0264892578125], "spans": [[0, 1], [1, 1]], "text": "human", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [173.5050048828125, 585.65185546875, 204.618408203125, 594.0264892578125], "spans": [[0, 2], [0, 3]], "text": "MRCNN", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [173.5050048828125, 585.65185546875, 204.618408203125, 594.0264892578125], "spans": [[0, 2], [0, 3]], "text": "MRCNN", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [220.13027954101562, 585.65185546875, 248.069580078125, 594.0264892578125], "spans": [[0, 4]], "text": "FRCNN", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [258.03125, 585.65185546875, 280.1782531738281, 594.0264892578125], "spans": [[0, 5]], "text": "YOLO", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [132.36500549316406, 585.65185546875, 157.99098205566406, 594.0264892578125], "spans": [[0, 1], [1, 1]], "text": "human", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [168.39300537109375, 574.6928100585938, 181.9950408935547, 583.0674438476562], "spans": [[1, 2]], "text": "R50", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [192.39605712890625, 574.6928100585938, 210.16746520996094, 583.0674438476562], "spans": [[1, 3]], "text": "R101", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [225.2130889892578, 574.6928100585938, 242.9844970703125, 583.0674438476562], "spans": [[1, 4]], "text": "R101", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [260.5137939453125, 574.6928100585938, 277.702392578125, 583.0674438476562], "spans": [[1, 5]], "text": "v5x6", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [67.66300201416016, 563.3358154296875, 96.8486328125, 571.71044921875], "spans": [[2, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [135.32400512695312, 563.3358154296875, 155.0321502685547, 571.71044921875], "spans": [[2, 1]], "text": "84-89", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [167.95399475097656, 563.3358154296875, 182.43472290039062, 571.71044921875], "spans": [[2, 2]], "text": "68.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [194.04620361328125, 563.3358154296875, 208.52694702148438, 571.71044921875], "spans": [[2, 3]], "text": "71.5", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [226.8632354736328, 563.3358154296875, 241.34396362304688, 571.71044921875], "spans": [[2, 4]], "text": "70.1", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [261.8680419921875, 563.3358154296875, 276.3487854003906, 571.71044921875], "spans": [[2, 5]], "text": "77.7", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [67.66300201416016, 552.3768310546875, 100.16619873046875, 560.75146484375], "spans": [[3, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [135.32400512695312, 552.3768310546875, 155.0321502685547, 560.75146484375], "spans": [[3, 1]], "text": "83-91", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [167.95399475097656, 552.3768310546875, 182.43472290039062, 560.75146484375], "spans": [[3, 2]], "text": "70.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [194.04620361328125, 552.3768310546875, 208.52694702148438, 560.75146484375], "spans": [[3, 3]], "text": "71.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [226.8632354736328, 552.3768310546875, 241.34396362304688, 560.75146484375], "spans": [[3, 4]], "text": "73.7", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [261.8680419921875, 552.3768310546875, 276.3487854003906, 560.75146484375], "spans": [[3, 5]], "text": "77.2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [67.66300201416016, 541.4178466796875, 98.1756591796875, 549.79248046875], "spans": [[4, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [135.32400512695312, 541.4178466796875, 155.0321502685547, 549.79248046875], "spans": [[4, 1]], "text": "83-85", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [167.95399475097656, 541.4178466796875, 182.43472290039062, 549.79248046875], "spans": [[4, 2]], "text": "60.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [194.04620361328125, 541.4178466796875, 208.52694702148438, 549.79248046875], "spans": [[4, 3]], "text": "63.4", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [226.8632354736328, 541.4178466796875, 241.34396362304688, 549.79248046875], "spans": [[4, 4]], "text": "63.5", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [261.8680419921875, 541.4178466796875, 276.3487854003906, 549.79248046875], "spans": [[4, 5]], "text": "66.2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [67.66300201416016, 530.4588012695312, 100.54279327392578, 538.8334350585938], "spans": [[5, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [135.32400512695312, 530.4588012695312, 155.0321502685547, 538.8334350585938], "spans": [[5, 1]], "text": "87-88", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [167.95399475097656, 530.4588012695312, 182.43472290039062, 538.8334350585938], "spans": [[5, 2]], "text": "81.2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [194.04620361328125, 530.4588012695312, 208.52694702148438, 538.8334350585938], "spans": [[5, 3]], "text": "80.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [226.8632354736328, 530.4588012695312, 241.34396362304688, 538.8334350585938], "spans": [[5, 4]], "text": "81.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [261.8680419921875, 530.4588012695312, 276.3487854003906, 538.8334350585938], "spans": [[5, 5]], "text": "86.2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [67.66300201416016, 519.4998168945312, 110.19064331054688, 527.8744506835938], "spans": [[6, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [135.32400512695312, 519.4998168945312, 155.0321502685547, 527.8744506835938], "spans": [[6, 1]], "text": "93-94", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [167.95399475097656, 519.4998168945312, 182.43472290039062, 527.8744506835938], "spans": [[6, 2]], "text": "61.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [194.04620361328125, 519.4998168945312, 208.52694702148438, 527.8744506835938], "spans": [[6, 3]], "text": "59.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [226.8632354736328, 519.4998168945312, 241.34396362304688, 527.8744506835938], "spans": [[6, 4]], "text": "58.9", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [261.8680419921875, 519.4998168945312, 276.3487854003906, 527.8744506835938], "spans": [[6, 5]], "text": "61.1", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [67.66300201416016, 508.54083251953125, 112.94332122802734, 516.9154663085938], "spans": [[7, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [135.32400512695312, 508.54083251953125, 155.0321502685547, 516.9154663085938], "spans": [[7, 1]], "text": "85-89", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [167.95399475097656, 508.54083251953125, 182.43472290039062, 516.9154663085938], "spans": [[7, 2]], "text": "71.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [194.04620361328125, 508.54083251953125, 208.52694702148438, 516.9154663085938], "spans": [[7, 3]], "text": "70.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [226.8632354736328, 508.54083251953125, 241.34396362304688, 516.9154663085938], "spans": [[7, 4]], "text": "72.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [261.8680419921875, 508.54083251953125, 276.3487854003906, 516.9154663085938], "spans": [[7, 5]], "text": "67.9", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [67.66300201416016, 497.5818176269531, 93.64762878417969, 505.9564514160156], "spans": [[8, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [135.32400512695312, 497.5818176269531, 155.0321502685547, 505.9564514160156], "spans": [[8, 1]], "text": "69-71", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [167.95399475097656, 497.5818176269531, 182.43472290039062, 505.9564514160156], "spans": [[8, 2]], "text": "71.7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [194.04620361328125, 497.5818176269531, 208.52694702148438, 505.9564514160156], "spans": [[8, 3]], "text": "72.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [226.8632354736328, 497.5818176269531, 241.34396362304688, 505.9564514160156], "spans": [[8, 4]], "text": "72.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [261.8680419921875, 497.5818176269531, 276.3487854003906, 505.9564514160156], "spans": [[8, 5]], "text": "77.1", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [67.66300201416016, 486.6228332519531, 122.40287780761719, 494.9974670410156], "spans": [[9, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [135.32400512695312, 486.6228332519531, 155.0321502685547, 494.9974670410156], "spans": [[9, 1]], "text": "83-84", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [167.95399475097656, 486.6228332519531, 182.43472290039062, 494.9974670410156], "spans": [[9, 2]], "text": "67.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [194.04620361328125, 486.6228332519531, 208.52694702148438, 494.9974670410156], "spans": [[9, 3]], "text": "69.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [226.8632354736328, 486.6228332519531, 241.34396362304688, 494.9974670410156], "spans": [[9, 4]], "text": "68.4", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [261.8680419921875, 486.6228332519531, 276.3487854003906, 494.9974670410156], "spans": [[9, 5]], "text": "74.6", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [67.66300201416016, 475.663818359375, 87.46977996826172, 484.0384521484375], "spans": [[10, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [135.32400512695312, 475.663818359375, 155.0321502685547, 484.0384521484375], "spans": [[10, 1]], "text": "77-81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [167.95399475097656, 475.663818359375, 182.43472290039062, 484.0384521484375], "spans": [[10, 2]], "text": "82.2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [194.04620361328125, 475.663818359375, 208.52694702148438, 484.0384521484375], "spans": [[10, 3]], "text": "82.9", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [226.8632354736328, 475.663818359375, 241.34396362304688, 484.0384521484375], "spans": [[10, 4]], "text": "82.2", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [261.8680419921875, 475.663818359375, 276.3487854003906, 484.0384521484375], "spans": [[10, 5]], "text": "86.3", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [67.66300201416016, 464.7058410644531, 83.62319946289062, 473.0804748535156], "spans": [[11, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [135.32400512695312, 464.7058410644531, 155.0321502685547, 473.0804748535156], "spans": [[11, 1]], "text": "84-86", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [167.95399475097656, 464.7058410644531, 182.43472290039062, 473.0804748535156], "spans": [[11, 2]], "text": "84.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [194.04620361328125, 464.7058410644531, 208.52694702148438, 473.0804748535156], "spans": [[11, 3]], "text": "85.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [226.8632354736328, 464.7058410644531, 241.34396362304688, 473.0804748535156], "spans": [[11, 4]], "text": "85.4", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [261.8680419921875, 464.7058410644531, 276.3487854003906, 473.0804748535156], "spans": [[11, 5]], "text": "88.1", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [67.66300201416016, 453.746826171875, 84.65432739257812, 462.1214599609375], "spans": [[12, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [135.32400512695312, 453.746826171875, 155.0321502685547, 462.1214599609375], "spans": [[12, 1]], "text": "60-72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [167.95399475097656, 453.746826171875, 182.43472290039062, 462.1214599609375], "spans": [[12, 2]], "text": "76.7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [194.04620361328125, 453.746826171875, 208.52694702148438, 462.1214599609375], "spans": [[12, 3]], "text": "80.4", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [226.8632354736328, 453.746826171875, 241.34396362304688, 462.1214599609375], "spans": [[12, 4]], "text": "79.9", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [261.8680419921875, 453.746826171875, 276.3487854003906, 462.1214599609375], "spans": [[12, 5]], "text": "82.7", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [67.66300201416016, 442.3888244628906, 78.62890625, 450.7634582519531], "spans": [[13, 0]], "text": "All", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [135.32400512695312, 442.3888244628906, 155.0321502685547, 450.7634582519531], "spans": [[13, 1]], "text": "82-83", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [167.95399475097656, 442.3888244628906, 182.43472290039062, 450.7634582519531], "spans": [[13, 2]], "text": "72.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [194.04620361328125, 442.3888244628906, 208.52694702148438, 450.7634582519531], "spans": [[13, 3]], "text": "73.5", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [226.8632354736328, 442.3888244628906, 241.34396362304688, 450.7634582519531], "spans": [[13, 4]], "text": "73.4", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [261.8680419921875, 442.3888244628906, 276.3487854003906, 450.7634582519531], "spans": [[13, 5]], "text": "76.8", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [80.5073471069336, 496.419189453125, 267.3428649902344, 640.9814453125], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels.", "type": "table", "#-cols": 5, "#-rows": 13, "data": [[{"bbox": [86.37200164794922, 630.5248413085938, 129.4645233154297, 638.8994750976562], "spans": [[0, 0]], "text": "Class-count", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [151.07400512695312, 630.5248413085938, 159.41275024414062, 638.8994750976562], "spans": [[0, 1]], "text": "11", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [179.3181610107422, 630.5248413085938, 183.48753356933594, 638.8994750976562], "spans": [[0, 2]], "text": "6", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [213.33668518066406, 630.5248413085938, 217.5060577392578, 638.8994750976562], "spans": [[0, 3]], "text": "5", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [247.35520935058594, 630.5248413085938, 251.5245819091797, 638.8994750976562], "spans": [[0, 4]], "text": "4", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [86.37200164794922, 619.1678466796875, 115.55763244628906, 627.54248046875], "spans": [[1, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [151.07400512695312, 619.1678466796875, 159.41275024414062, 627.54248046875], "spans": [[1, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [173.42723083496094, 619.1678466796875, 189.38742065429688, 627.54248046875], "spans": [[1, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [207.4457550048828, 619.1678466796875, 223.40594482421875, 627.54248046875], "spans": [[1, 3]], "text": "Text", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [241.4642791748047, 619.1678466796875, 257.4244689941406, 627.54248046875], "spans": [[1, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [86.37200164794922, 608.2088012695312, 118.87519836425781, 616.5834350585938], "spans": [[2, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [151.07400512695312, 608.2088012695312, 159.41275024414062, 616.5834350585938], "spans": [[2, 1]], "text": "71", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [173.42723083496094, 608.2088012695312, 189.38742065429688, 616.5834350585938], "spans": [[2, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [207.4457550048828, 608.2088012695312, 223.40594482421875, 616.5834350585938], "spans": [[2, 3]], "text": "Text", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [241.4642791748047, 608.2088012695312, 257.4244689941406, 616.5834350585938], "spans": [[2, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [86.37200164794922, 597.2498168945312, 116.88465881347656, 605.6244506835938], "spans": [[3, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [151.07400512695312, 597.2498168945312, 159.41275024414062, 605.6244506835938], "spans": [[3, 1]], "text": "60", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [173.42723083496094, 597.2498168945312, 189.38742065429688, 605.6244506835938], "spans": [[3, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [207.4457550048828, 597.2498168945312, 223.40594482421875, 605.6244506835938], "spans": [[3, 3]], "text": "Text", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [241.4642791748047, 597.2498168945312, 257.4244689941406, 605.6244506835938], "spans": [[3, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [86.37200164794922, 586.2908325195312, 119.25179290771484, 594.6654663085938], "spans": [[4, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [151.07400512695312, 586.2908325195312, 159.41275024414062, 594.6654663085938], "spans": [[4, 1]], "text": "81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [173.42723083496094, 586.2908325195312, 189.38742065429688, 594.6654663085938], "spans": [[4, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [211.2564697265625, 586.2908325195312, 219.59521484375, 594.6654663085938], "spans": [[4, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [241.46426391601562, 586.2908325195312, 257.4244689941406, 594.6654663085938], "spans": [[4, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [86.37200164794922, 575.3318481445312, 128.89964294433594, 583.7064819335938], "spans": [[5, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [151.07400512695312, 575.3318481445312, 159.41275024414062, 583.7064819335938], "spans": [[5, 1]], "text": "62", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [177.23794555664062, 575.3318481445312, 185.57669067382812, 583.7064819335938], "spans": [[5, 2]], "text": "62", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [213.9105224609375, 575.3318481445312, 216.941162109375, 583.7064819335938], "spans": [[5, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [247.92904663085938, 575.3318481445312, 250.95968627929688, 583.7064819335938], "spans": [[5, 4]], "text": "-", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [86.37200164794922, 564.372802734375, 131.65231323242188, 572.7474365234375], "spans": [[6, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [151.07400512695312, 564.372802734375, 159.41275024414062, 572.7474365234375], "spans": [[6, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [177.23794555664062, 564.372802734375, 185.57669067382812, 572.7474365234375], "spans": [[6, 2]], "text": "68", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [213.9105224609375, 564.372802734375, 216.941162109375, 572.7474365234375], "spans": [[6, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [247.92904663085938, 564.372802734375, 250.95968627929688, 572.7474365234375], "spans": [[6, 4]], "text": "-", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [86.37200164794922, 553.413818359375, 112.35662841796875, 561.7884521484375], "spans": [[7, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [151.07400512695312, 553.413818359375, 159.41275024414062, 561.7884521484375], "spans": [[7, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [177.23794555664062, 553.413818359375, 185.57669067382812, 561.7884521484375], "spans": [[7, 2]], "text": "72", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [211.25645446777344, 553.413818359375, 219.59519958496094, 561.7884521484375], "spans": [[7, 3]], "text": "72", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [245.27496337890625, 553.413818359375, 253.61370849609375, 561.7884521484375], "spans": [[7, 4]], "text": "72", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [86.37200164794922, 542.455810546875, 141.11187744140625, 550.8304443359375], "spans": [[8, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [151.07400512695312, 542.455810546875, 159.41275024414062, 550.8304443359375], "spans": [[8, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [177.23794555664062, 542.455810546875, 185.57669067382812, 550.8304443359375], "spans": [[8, 2]], "text": "67", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [211.25645446777344, 542.455810546875, 219.59519958496094, 550.8304443359375], "spans": [[8, 3]], "text": "69", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [245.27496337890625, 542.455810546875, 253.61370849609375, 550.8304443359375], "spans": [[8, 4]], "text": "68", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [86.37200164794922, 531.496826171875, 106.17877960205078, 539.8714599609375], "spans": [[9, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [151.07400512695312, 531.496826171875, 159.41275024414062, 539.8714599609375], "spans": [[9, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [177.23794555664062, 531.496826171875, 185.57669067382812, 539.8714599609375], "spans": [[9, 2]], "text": "83", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [211.25645446777344, 531.496826171875, 219.59519958496094, 539.8714599609375], "spans": [[9, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [245.27496337890625, 531.496826171875, 253.61370849609375, 539.8714599609375], "spans": [[9, 4]], "text": "82", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [86.37200164794922, 520.537841796875, 102.33219909667969, 528.9124755859375], "spans": [[10, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [151.07400512695312, 520.537841796875, 159.41275024414062, 528.9124755859375], "spans": [[10, 1]], "text": "85", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [177.23794555664062, 520.537841796875, 185.57669067382812, 528.9124755859375], "spans": [[10, 2]], "text": "84", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [211.25645446777344, 520.537841796875, 219.59519958496094, 528.9124755859375], "spans": [[10, 3]], "text": "84", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [245.27496337890625, 520.537841796875, 253.61370849609375, 528.9124755859375], "spans": [[10, 4]], "text": "84", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [86.37200164794922, 509.5788269042969, 103.36332702636719, 517.9534301757812], "spans": [[11, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [151.07400512695312, 509.5788269042969, 159.41275024414062, 517.9534301757812], "spans": [[11, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [169.37442016601562, 509.5788269042969, 193.4312744140625, 517.9534301757812], "spans": [[11, 2]], "text": "Sec.-h.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [203.3929443359375, 509.5788269042969, 227.44979858398438, 517.9534301757812], "spans": [[11, 3]], "text": "Sec.-h.", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [237.41146850585938, 509.5788269042969, 261.46832275390625, 517.9534301757812], "spans": [[11, 4]], "text": "Sec.-h.", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [86.37200164794922, 498.2208251953125, 113.3160171508789, 506.595458984375], "spans": [[12, 0]], "text": "Overall", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [151.07400512695312, 498.2208251953125, 159.41275024414062, 506.595458984375], "spans": [[12, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [177.23794555664062, 498.2208251953125, 185.57669067382812, 506.595458984375], "spans": [[12, 2]], "text": "73", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [211.25645446777344, 498.2208251953125, 219.59519958496094, 506.595458984375], "spans": [[12, 3]], "text": "78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [245.27496337890625, 498.2208251953125, 253.61370849609375, 506.595458984375], "spans": [[12, 4]], "text": "77", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [353.065185546875, 485.2873840332031, 523.3069458007812, 641.25341796875], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement.", "type": "table", "#-cols": 5, "#-rows": 14, "data": [[{"bbox": [358.6390075683594, 630.5248413085938, 401.7315368652344, 638.8994750976562], "spans": [[0, 0]], "text": "Class-count", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [440.2250061035156, 630.5248413085938, 448.5637512207031, 638.8994750976562], "spans": [[0, 1], [0, 2]], "text": "11", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [440.2250061035156, 630.5248413085938, 448.5637512207031, 638.8994750976562], "spans": [[0, 1], [0, 2]], "text": "11", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [494.3800048828125, 630.5248413085938, 498.54937744140625, 638.8994750976562], "spans": [[0, 3], [0, 4]], "text": "5", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [494.3800048828125, 630.5248413085938, 498.54937744140625, 638.8994750976562], "spans": [[0, 3], [0, 4]], "text": "5", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [358.6390075683594, 619.5658569335938, 375.27166748046875, 627.9404907226562], "spans": [[1, 0]], "text": "Split", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [423.34100341796875, 619.5658569335938, 438.0458984375, 627.9404907226562], "spans": [[1, 1]], "text": "Doc", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [448.007568359375, 619.5658569335938, 465.44720458984375, 627.9404907226562], "spans": [[1, 2]], "text": "Page", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [475.4110107421875, 619.5658569335938, 490.11590576171875, 627.9404907226562], "spans": [[1, 3]], "text": "Doc", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [500.07757568359375, 619.5658569335938, 517.5172119140625, 627.9404907226562], "spans": [[1, 4]], "text": "Page", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [358.6390075683594, 608.2088012695312, 387.82464599609375, 616.5834350585938], "spans": [[2, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [426.52398681640625, 608.2088012695312, 434.86273193359375, 616.5834350585938], "spans": [[2, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [452.5624084472656, 608.2088012695312, 460.9011535644531, 616.5834350585938], "spans": [[2, 2]], "text": "83", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [358.6390075683594, 597.2498168945312, 391.1422119140625, 605.6244506835938], "spans": [[3, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [426.52398681640625, 597.2498168945312, 434.86273193359375, 605.6244506835938], "spans": [[3, 1]], "text": "71", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [452.5624084472656, 597.2498168945312, 460.9011535644531, 605.6244506835938], "spans": [[3, 2]], "text": "84", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [358.6390075683594, 586.2908325195312, 389.15167236328125, 594.6654663085938], "spans": [[4, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [426.52398681640625, 586.2908325195312, 434.86273193359375, 594.6654663085938], "spans": [[4, 1]], "text": "60", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [452.5624084472656, 586.2908325195312, 460.9011535644531, 594.6654663085938], "spans": [[4, 2]], "text": "66", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [358.6390075683594, 575.3318481445312, 391.518798828125, 583.7064819335938], "spans": [[5, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [426.52398681640625, 575.3318481445312, 434.86273193359375, 583.7064819335938], "spans": [[5, 1]], "text": "81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [452.5624084472656, 575.3318481445312, 460.9011535644531, 583.7064819335938], "spans": [[5, 2]], "text": "88", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [478.593994140625, 575.3318481445312, 486.9327392578125, 583.7064819335938], "spans": [[5, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [504.6324157714844, 575.3318481445312, 512.97119140625, 583.7064819335938], "spans": [[5, 4]], "text": "88", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [358.6390075683594, 564.372802734375, 401.1666564941406, 572.7474365234375], "spans": [[6, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [426.52398681640625, 564.372802734375, 434.86273193359375, 572.7474365234375], "spans": [[6, 1]], "text": "62", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [452.5624084472656, 564.372802734375, 460.9011535644531, 572.7474365234375], "spans": [[6, 2]], "text": "89", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [358.6390075683594, 553.413818359375, 403.9193115234375, 561.7884521484375], "spans": [[7, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [426.52398681640625, 553.413818359375, 434.86273193359375, 561.7884521484375], "spans": [[7, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [452.5624084472656, 553.413818359375, 460.9011535644531, 561.7884521484375], "spans": [[7, 2]], "text": "90", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [358.6390075683594, 542.455810546875, 384.6236572265625, 550.8304443359375], "spans": [[8, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [426.52398681640625, 542.455810546875, 434.86273193359375, 550.8304443359375], "spans": [[8, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [452.5624084472656, 542.455810546875, 460.9011535644531, 550.8304443359375], "spans": [[8, 2]], "text": "82", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [478.593994140625, 542.455810546875, 486.9327392578125, 550.8304443359375], "spans": [[8, 3]], "text": "72", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [504.6324157714844, 542.455810546875, 512.97119140625, 550.8304443359375], "spans": [[8, 4]], "text": "82", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [358.6390075683594, 531.496826171875, 413.37890625, 539.8714599609375], "spans": [[9, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [426.52398681640625, 531.496826171875, 434.86273193359375, 539.8714599609375], "spans": [[9, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [452.5624084472656, 531.496826171875, 460.9011535644531, 539.8714599609375], "spans": [[9, 2]], "text": "83", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [478.593994140625, 531.496826171875, 486.9327392578125, 539.8714599609375], "spans": [[9, 3]], "text": "69", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [504.6324157714844, 531.496826171875, 512.97119140625, 539.8714599609375], "spans": [[9, 4]], "text": "83", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [358.6390075683594, 520.537841796875, 378.4457702636719, 528.9124755859375], "spans": [[10, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [426.52398681640625, 520.537841796875, 434.86273193359375, 528.9124755859375], "spans": [[10, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [452.5624084472656, 520.537841796875, 460.9011535644531, 528.9124755859375], "spans": [[10, 2]], "text": "89", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [478.593994140625, 520.537841796875, 486.9327392578125, 528.9124755859375], "spans": [[10, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [504.6324157714844, 520.537841796875, 512.97119140625, 528.9124755859375], "spans": [[10, 4]], "text": "90", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [358.6390075683594, 509.5788269042969, 374.5992126464844, 517.9534301757812], "spans": [[11, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [426.52398681640625, 509.5788269042969, 434.86273193359375, 517.9534301757812], "spans": [[11, 1]], "text": "85", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [452.5624084472656, 509.5788269042969, 460.9011535644531, 517.9534301757812], "spans": [[11, 2]], "text": "91", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [478.593994140625, 509.5788269042969, 486.9327392578125, 517.9534301757812], "spans": [[11, 3]], "text": "84", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [504.6324157714844, 509.5788269042969, 512.97119140625, 517.9534301757812], "spans": [[11, 4]], "text": "90", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [358.6390075683594, 498.6198425292969, 375.6303405761719, 506.9944763183594], "spans": [[12, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [426.52398681640625, 498.6198425292969, 434.86273193359375, 506.9944763183594], "spans": [[12, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [452.5624084472656, 498.6198425292969, 460.9011535644531, 506.9944763183594], "spans": [[12, 2]], "text": "81", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [358.6390075683594, 487.2628173828125, 369.60491943359375, 495.637451171875], "spans": [[13, 0]], "text": "All", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [426.52398681640625, 487.2628173828125, 434.86273193359375, 495.637451171875], "spans": [[13, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [452.5624084472656, 487.2628173828125, 460.9011535644531, 495.637451171875], "spans": [[13, 2]], "text": "84", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [478.593994140625, 487.2628173828125, 486.9327392578125, 495.637451171875], "spans": [[13, 3]], "text": "78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [504.6324157714844, 487.2628173828125, 512.97119140625, 495.637451171875], "spans": [[13, 4]], "text": "87", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [72.87370300292969, 452.12615966796875, 274.87945556640625, 619.3699951171875], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets.", "type": "table", "#-cols": 4, "#-rows": 15, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [217.74099731445312, 608.6068115234375, 256.2606506347656, 616.9814453125], "spans": [[0, 1], [0, 2], [0, 3]], "text": "Testing on", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [217.74099731445312, 608.6068115234375, 256.2606506347656, 616.9814453125], "spans": [[0, 1], [0, 2], [0, 3]], "text": "Testing on", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [217.74099731445312, 608.6068115234375, 256.2606506347656, 616.9814453125], "spans": [[0, 1], [0, 2], [0, 3]], "text": "Testing on", "type": "col_header", "col": 3, "col-header": false, "col-span": [1, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [154.62899780273438, 597.6488037109375, 175.4758758544922, 606.0234375], "spans": [[1, 0]], "text": "labels", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [204.69000244140625, 597.6488037109375, 220.5426025390625, 606.0234375], "spans": [[1, 1]], "text": "PLN", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [230.5042724609375, 597.6488037109375, 242.0619659423828, 606.0234375], "spans": [[1, 2]], "text": "DB", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [252.0236358642578, 597.6488037109375, 269.31085205078125, 606.0234375], "spans": [[1, 3]], "text": "DLN", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [154.62899780273438, 586.2908325195312, 177.9237060546875, 594.6654663085938], "spans": [[2, 0]], "text": "Figure", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [208.44700622558594, 586.2908325195312, 216.78575134277344, 594.6654663085938], "spans": [[2, 1]], "text": "96", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [232.11830139160156, 586.2908325195312, 240.45704650878906, 594.6654663085938], "spans": [[2, 2]], "text": "43", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [256.4979248046875, 586.2908325195312, 264.836669921875, 594.6654663085938], "spans": [[2, 3]], "text": "23", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [154.62899780273438, 575.3318481445312, 194.72674560546875, 583.7064819335938], "spans": [[3, 0]], "text": "Sec-header", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [208.44700622558594, 575.3318481445312, 216.78575134277344, 583.7064819335938], "spans": [[3, 1]], "text": "87", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [234.77235412597656, 575.3318481445312, 237.80299377441406, 583.7064819335938], "spans": [[3, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [256.4979248046875, 575.3318481445312, 264.836669921875, 583.7064819335938], "spans": [[3, 3]], "text": "32", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [154.62899780273438, 564.372802734375, 174.43577575683594, 572.7474365234375], "spans": [[4, 0]], "text": "Table", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [208.44700622558594, 564.372802734375, 216.78575134277344, 572.7474365234375], "spans": [[4, 1]], "text": "95", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [232.11830139160156, 564.372802734375, 240.45704650878906, 572.7474365234375], "spans": [[4, 2]], "text": "24", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [256.4979248046875, 564.372802734375, 264.836669921875, 572.7474365234375], "spans": [[4, 3]], "text": "49", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [154.62899780273438, 553.413818359375, 170.5891876220703, 561.7884521484375], "spans": [[5, 0]], "text": "Text", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [208.44700622558594, 553.413818359375, 216.78575134277344, 561.7884521484375], "spans": [[5, 1]], "text": "96", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [234.77235412597656, 553.413818359375, 237.80299377441406, 561.7884521484375], "spans": [[5, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [256.4979248046875, 553.413818359375, 264.836669921875, 561.7884521484375], "spans": [[5, 3]], "text": "42", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [154.62899780273438, 542.455810546875, 171.27960205078125, 550.8304443359375], "spans": [[6, 0]], "text": "total", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [208.44700622558594, 542.455810546875, 216.78575134277344, 550.8304443359375], "spans": [[6, 1]], "text": "93", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [232.11830139160156, 542.455810546875, 240.45704650878906, 550.8304443359375], "spans": [[6, 2]], "text": "34", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [256.4979248046875, 542.455810546875, 264.836669921875, 550.8304443359375], "spans": [[6, 3]], "text": "30", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [154.62899780273438, 531.0978393554688, 177.9237060546875, 539.4724731445312], "spans": [[7, 0]], "text": "Figure", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [208.44700622558594, 531.0978393554688, 216.78575134277344, 539.4724731445312], "spans": [[7, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [232.11830139160156, 531.0978393554688, 240.45704650878906, 539.4724731445312], "spans": [[7, 2]], "text": "71", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [256.4979248046875, 531.0978393554688, 264.836669921875, 539.4724731445312], "spans": [[7, 3]], "text": "31", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [154.62899780273438, 520.1388549804688, 174.43577575683594, 528.5134887695312], "spans": [[8, 0]], "text": "Table", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [208.44700622558594, 520.1388549804688, 216.78575134277344, 528.5134887695312], "spans": [[8, 1]], "text": "19", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [232.11830139160156, 520.1388549804688, 240.45704650878906, 528.5134887695312], "spans": [[8, 2]], "text": "65", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [256.4979248046875, 520.1388549804688, 264.836669921875, 528.5134887695312], "spans": [[8, 3]], "text": "22", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [154.62899780273438, 509.1798400878906, 171.27960205078125, 517.554443359375], "spans": [[9, 0]], "text": "total", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [208.44700622558594, 509.1798400878906, 216.78575134277344, 517.554443359375], "spans": [[9, 1]], "text": "48", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [232.11830139160156, 509.1798400878906, 240.45704650878906, 517.554443359375], "spans": [[9, 2]], "text": "68", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [256.4979248046875, 509.1798400878906, 264.836669921875, 517.554443359375], "spans": [[9, 3]], "text": "27", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [154.62899780273438, 497.82281494140625, 177.9237060546875, 506.19744873046875], "spans": [[10, 0]], "text": "Figure", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [208.44700622558594, 497.82281494140625, 216.78575134277344, 506.19744873046875], "spans": [[10, 1]], "text": "67", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [232.11830139160156, 497.82281494140625, 240.45704650878906, 506.19744873046875], "spans": [[10, 2]], "text": "51", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [256.4979248046875, 497.82281494140625, 264.836669921875, 506.19744873046875], "spans": [[10, 3]], "text": "72", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [154.62899780273438, 486.86383056640625, 194.72674560546875, 495.23846435546875], "spans": [[11, 0]], "text": "Sec-header", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [208.44700622558594, 486.86383056640625, 216.78575134277344, 495.23846435546875], "spans": [[11, 1]], "text": "53", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [234.77235412597656, 486.86383056640625, 237.80299377441406, 495.23846435546875], "spans": [[11, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [256.4979248046875, 486.86383056640625, 264.836669921875, 495.23846435546875], "spans": [[11, 3]], "text": "68", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [154.62899780273438, 475.9048156738281, 174.43577575683594, 484.2794494628906], "spans": [[12, 0]], "text": "Table", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [208.44700622558594, 475.9048156738281, 216.78575134277344, 484.2794494628906], "spans": [[12, 1]], "text": "87", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [232.11830139160156, 475.9048156738281, 240.45704650878906, 484.2794494628906], "spans": [[12, 2]], "text": "43", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [256.4979248046875, 475.9048156738281, 264.836669921875, 484.2794494628906], "spans": [[12, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [154.62899780273438, 464.9458312988281, 170.5891876220703, 473.3204650878906], "spans": [[13, 0]], "text": "Text", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [208.44700622558594, 464.9458312988281, 216.78575134277344, 473.3204650878906], "spans": [[13, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [234.77235412597656, 464.9458312988281, 237.80299377441406, 473.3204650878906], "spans": [[13, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [256.4979248046875, 464.9458312988281, 264.836669921875, 473.3204650878906], "spans": [[13, 3]], "text": "84", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [154.62899780273438, 453.98681640625, 171.27960205078125, 462.3614501953125], "spans": [[14, 0]], "text": "total", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [208.44700622558594, 453.98681640625, 216.78575134277344, 462.3614501953125], "spans": [[14, 1]], "text": "59", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [232.11830139160156, 453.98681640625, 240.45704650878906, 462.3614501953125], "spans": [[14, 2]], "text": "47", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [256.4979248046875, 453.98681640625, 264.836669921875, 462.3614501953125], "spans": [[14, 3]], "text": "78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 14, "row-header": false, "row-span": [14, 15]}]], "model": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2206.01062.pdf", "filename-prov": null, "document-hash": "5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc", "#-pages": 9, "collection-name": null, "description": null, "page-hashes": [{"hash": "3c76b6d3fd82865e42c51d5cbd7d1a9996dba7902643b919acc581e866b92716", "model": "default", "page": 1}, {"hash": "5ccfaddd314d3712cbabc857c8c0f33d1268341ce37b27089857cbf09f0522d4", "model": "default", "page": 2}, {"hash": "d2dc51ad0a01ee9486ffe248649ee1cd10ce35773de8e4b21abf30d310f4fc26", "model": "default", "page": 3}, {"hash": "310121977375f8f1106412189943bd70f121629b2b4d35394077233dedbfb041", "model": "default", "page": 4}, {"hash": "09fa72b602eb0640669844acabc17ef494802a4a9188aeaaf0e0131c496e6951", "model": "default", "page": 5}, {"hash": "ec3fa60f136f3d9f5fa790ab27f5d1c14e5622573c52377b909b591d0be0ea44", "model": "default", "page": 6}, {"hash": "ec1bc56fe581ce95615b1fab11c3ba8fc89662acf2f53446decd380a155b06dd", "model": "default", "page": 7}, {"hash": "fbd2b06876dddc19ee08e0a9751d978c03e6943b74bedf1d83d6528cd4f8954d", "model": "default", "page": 8}, {"hash": "6cfa4eb4410fa9972da289dbf8d8cc585d317a192e1214c778ddd7768e98f311", "model": "default", "page": 9}]}, "main-text": [{"prov": [{"bbox": [107.30000305175781, 672.3833618164062, 505.1857604980469, 709.082275390625], "page": 1, "span": [0, 71], "__ref_s3_data": null}], "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [90.94670867919922, 611.2825317382812, 193.91998291015625, 658.7803344726562], "page": 1, "span": [0, 73], "__ref_s3_data": null}], "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [254.97935485839844, 611.7597045898438, 357.8802490234375, 658.7174072265625], "page": 1, "span": [0, 71], "__ref_s3_data": null}], "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [419.0672302246094, 611.7597045898438, 522.0595703125, 658.9878540039062], "page": 1, "span": [0, 70], "__ref_s3_data": null}], "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [171.90907287597656, 553.3746948242188, 275.3072509765625, 600.1580200195312], "page": 1, "span": [0, 72], "__ref_s3_data": null}], "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [336.5292053222656, 553.3746948242188, 439.84405517578125, 599.942626953125], "page": 1, "span": [0, 68], "__ref_s3_data": null}], "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.33011245727539, 533.9879760742188, 112.2127456665039, 544.47509765625], "page": 1, "span": [0, 8], "__ref_s3_data": null}], "text": "ABSTRACT", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [52.857933044433594, 257.10565185546875, 295.5601806640625, 529.5941162109375], "page": 1, "span": [0, 1595], "__ref_s3_data": null}], "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.36912155151367, 230.69398498535156, 134.81988525390625, 241.21551513671875], "page": 1, "span": [0, 12], "__ref_s3_data": null}], "text": "CCS CONCEPTS", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [53.02470016479492, 194.8704071044922, 297.8529357910156, 226.241455078125], "page": 1, "span": [0, 170], "__ref_s3_data": null}], "text": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; Object detection ;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.33460235595703, 117.82738494873047, 295.11798095703125, 158.33511352539062], "page": 1, "span": [0, 397], "__ref_s3_data": null}], "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.31700134277344, 85.73310852050781, 197.8627471923828, 116.91976928710938], "page": 1, "span": [0, 168], "__ref_s3_data": null}], "text": "KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.2291564941406, 232.3291473388672, 559.8057861328125, 252.12974548339844], "page": 1, "span": [0, 84], "__ref_s3_data": null}], "text": "Figure 1: Four examples of complex page layouts across different document categories", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"prov": [{"bbox": [317.11431884765625, 189.22499084472656, 379.82049560546875, 199.97215270996094], "page": 1, "span": [0, 8], "__ref_s3_data": null}], "text": "KEYWORDS", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [317.2037658691406, 164.9988250732422, 559.2164306640625, 184.67845153808594], "page": 1, "span": [0, 90], "__ref_s3_data": null}], "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.3434753417969, 144.41390991210938, 404.6536560058594, 152.36439514160156], "page": 1, "span": [0, 21], "__ref_s3_data": null}], "text": "ACM Reference Format:", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [317.1117248535156, 84.62297058105469, 559.5494995117188, 142.41151428222656], "page": 1, "span": [0, 374], "__ref_s3_data": null}], "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.19501876831055, 722.7692260742188, 558.4357299804688, 732.1524047851562], "page": 2, "span": [0, 130], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [53.79800033569336, 695.8309936523438, 156.52899169921875, 706.4523315429688], "page": 2, "span": [0, 14], "__ref_s3_data": null}], "text": "1 INTRODUCTION", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [52.80397415161133, 562.986572265625, 303.1766357421875, 681.3472290039062], "page": 2, "span": [0, 702], "__ref_s3_data": null}], "text": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [52.89326477050781, 289.0808410644531, 295.5641174316406, 561.2902221679688], "page": 2, "span": [0, 1580], "__ref_s3_data": null}], "text": "A key problem in the process of document conversion is to understand the structure of a single document page, i.e. which segments of text should be grouped together in a unit. To train models for this task, there are currently two large datasets available to the community, PubLayNet [6] and DocBank [7]. They were introduced in 2019 and 2020 respectively and significantly accelerated the implementation of layout detection and segmentation models due to their sizes of 300K and 500K ground-truth pages. These sizes were achieved by leveraging an automation approach. The benefit of automated ground-truth generation is obvious: one can generate large ground-truth datasets at virtually no cost. However, the automation introduces a constraint on the variability in the dataset, because corresponding structured source data must be available. PubLayNet and DocBank were both generated from scientific document repositories (PubMed and arXiv), which provide XML or L A T E X sources. Those scientific documents present a limited variability in their layouts, because they are typeset in uniform templates provided by the publishers. Obviously, documents such as technical manuals, annual company reports, legal text, government tenders, etc. have very different and partially unique layouts. As a consequence, the layout predictions obtained from models trained on PubLayNet or DocBank is very reasonable when applied on scientific documents. However, for more artistic or free-style layouts, we see sub-par prediction quality from these models, which we demonstrate in Section 5.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.12458419799805, 212.36782836914062, 295.56396484375, 287.0208740234375], "page": 2, "span": [0, 462], "__ref_s3_data": null}], "text": "In this paper, we present the DocLayNet dataset. It provides pageby-page layout annotation ground-truth using bounding-boxes for 11 distinct class labels on 80863 unique document pages, of which a fraction carry double- or triple-annotations. DocLayNet is similar in spirit to PubLayNet and DocBank and will likewise be made available to the public 1 in order to stimulate the document-layout analysis community. It distinguishes itself in the following aspects:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.64593505859375, 176.96405029296875, 295.5616455078125, 208.28524780273438], "page": 2, "span": [0, 149], "__ref_s3_data": null}], "text": "(1) Human Annotation : In contrast to PubLayNet and DocBank, we relied on human annotation instead of automation approaches to generate the data set.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.50244140625, 154.92233276367188, 294.3029479980469, 174.95782470703125], "page": 2, "span": [0, 109], "__ref_s3_data": null}], "text": "(2) Large Layout Variability : We include diverse and complex layouts from a large variety of public sources.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.18266296386719, 121.99307250976562, 294.6838073730469, 153.57122802734375], "page": 2, "span": [0, 180], "__ref_s3_data": null}], "text": "(3) Detailed Label Set : We define 11 class labels to distinguish layout features in high detail. PubLayNet provides 5 labels; DocBank provides 13, although not a superset of ours.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.30329132080078, 99.92230987548828, 295.56439208984375, 120.3491439819336], "page": 2, "span": [0, 115], "__ref_s3_data": null}], "text": "(4) Redundant Annotations : A fraction of the pages in the DocLayNet data set carry more than one human annotation.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.60314178466797, 82.76702880859375, 216.05824279785156, 90.63584899902344], "page": 2, "span": [0, 60], "__ref_s3_data": null}], "text": "$^{1}$https://developer.ibm.com/exchanges/data/all/doclaynet", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [341.2403564453125, 685.3028564453125, 558.5009765625, 705.5034790039062], "page": 2, "span": [0, 86], "__ref_s3_data": null}], "text": "This enables experimentation with annotation uncertainty and quality control analysis.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [328.06146240234375, 630.4351806640625, 559.7210083007812, 683.4995727539062], "page": 2, "span": [0, 280], "__ref_s3_data": null}], "text": "(5) Pre-defined Train-, Test- & Validation-set : Like DocBank, we provide fixed train-, test- & validation-sets to ensure proportional representation of the class-labels. Further, we prevent leakage of unique layouts across sets, which has a large effect on model accuracy scores.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.0706787109375, 571.292724609375, 559.1903076171875, 624.9239501953125], "page": 2, "span": [0, 297], "__ref_s3_data": null}], "text": "All aspects outlined above are detailed in Section 3. In Section 4, we will elaborate on how we designed and executed this large-scale human annotation campaign. We will also share key insights and lessons learned that might prove helpful for other parties planning to set up annotation campaigns.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [316.9918518066406, 483.6390686035156, 559.5819702148438, 569.6455078125], "page": 2, "span": [0, 506], "__ref_s3_data": null}], "text": "In Section 5, we will present baseline accuracy numbers for a variety of object detection methods (Faster R-CNN, Mask R-CNN and YOLOv5) trained on DocLayNet. We further show how the model performance is impacted by varying the DocLayNet dataset size, reducing the label set and modifying the train/test-split. Last but not least, we compare the performance of models trained on PubLayNet, DocBank and DocLayNet and demonstrate that a model trained on DocLayNet provides overall more robust layout recovery.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.33935546875, 460.4820251464844, 422.0046081542969, 471.2471923828125], "page": 2, "span": [0, 14], "__ref_s3_data": null}], "text": "2 RELATED WORK", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [316.9687805175781, 327.7038269042969, 559.7161254882812, 446.38397216796875], "page": 2, "span": [0, 655], "__ref_s3_data": null}], "text": "While early approaches in document-layout analysis used rulebased algorithms and heuristics [8], the problem is lately addressed with deep learning methods. The most common approach is to leverage object detection models [9-15]. In the last decade, the accuracy and speed of these models has increased dramatically. Furthermore, most state-of-the-art object detection methods can be trained and applied with very little work, thanks to a standardisation effort of the ground-truth data format [16] and common deep-learning frameworks [17]. Reference data sets such as PubLayNet [6] and DocBank provide their data in the commonly accepted COCO format [16].", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.156982421875, 239.59246826171875, 559.1864624023438, 325.6906433105469], "page": 2, "span": [0, 500], "__ref_s3_data": null}], "text": "Lately, new types of ML models for document-layout analysis have emerged in the community [18-21]. These models do not approach the problem of layout analysis purely based on an image representation of the page, as computer vision methods do. Instead, they combine the text tokens and image representation of a page in order to obtain a segmentation. While the reported accuracies appear to be promising, a broadly accepted data format which links geometric and textual features has yet to establish.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.58740234375, 216.37100219726562, 477.8531799316406, 226.6800994873047], "page": 2, "span": [0, 23], "__ref_s3_data": null}], "text": "3 THE DOCLAYNET DATASET", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [317.11236572265625, 116.19312286376953, 559.7131958007812, 202.27523803710938], "page": 2, "span": [0, 522], "__ref_s3_data": null}], "text": "DocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape of labeled, rectangular boundingboxes. We define 11 distinct labels for layout features, namely Caption , Footnote , Formula , List-item , Page-footer , Page-header , Picture , Section-header , Table , Text , and Title . Our reasoning for picking this particular label set is detailed in Section 4.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.34619140625, 83.59282684326172, 558.5303344726562, 114.41421508789062], "page": 2, "span": [0, 186], "__ref_s3_data": null}], "text": "In addition to open intellectual property constraints for the source documents, we required that the documents in DocLayNet adhere to a few conditions. Firstly, we kept scanned documents", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.4626579284668, 722.95458984375, 347.0511779785156, 732.11474609375], "page": 3, "span": [0, 71], "__ref_s3_data": null}], "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [365.31488037109375, 723.0569458007812, 558.807861328125, 731.9796142578125], "page": 3, "span": [0, 48], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [53.28777313232422, 536.294677734375, 294.0437316894531, 556.148193359375], "page": 3, "span": [0, 69], "__ref_s3_data": null}], "text": "Figure 2: Distribution of DocLayNet pages across document categories.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"prov": [{"bbox": [53.244232177734375, 424.931396484375, 294.5379943847656, 510.7526550292969], "page": 3, "span": [0, 513], "__ref_s3_data": null}], "text": "to a minimum, since they introduce difficulties in annotation (see Section 4). As a second condition, we focussed on medium to large documents ( > 10 pages) with technical content, dense in complex tables, figures, plots and captions. Such documents carry a lot of information value, but are often hard to analyse with high accuracy due to their challenging layouts. Counterexamples of documents not included in the dataset are receipts, invoices, hand-written documents or photographs showing \"text in the wild\".", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.10974884033203, 282.6438293457031, 295.5604553222656, 423.1407775878906], "page": 3, "span": [0, 810], "__ref_s3_data": null}], "text": "The pages in DocLayNet can be grouped into six distinct categories, namely Financial Reports , Manuals , Scientific Articles , Laws & Regulations , Patents and Government Tenders . Each document category was sourced from various repositories. For example, Financial Reports contain both free-style format annual reports 2 which expose company-specific, artistic layouts as well as the more formal SEC filings. The two largest categories ( Financial Reports and Manuals ) contain a large amount of free-style layouts in order to obtain maximum variability. In the other four categories, we boosted the variability by mixing documents from independent providers, such as different government websites or publishers. In Figure 2, we show the document categories contained in DocLayNet with their respective sizes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [52.8973388671875, 183.77932739257812, 295.5615539550781, 281.3227233886719], "page": 3, "span": [0, 535], "__ref_s3_data": null}], "text": "We did not control the document selection with regard to language. The vast majority of documents contained in DocLayNet (close to 95%) are published in English language. However, DocLayNet also contains a number of documents in other languages such as German (2.5%), French (1.0%) and Japanese (1.0%). While the document language has negligible impact on the performance of computer vision methods such as object detection and segmentation models, it might prove challenging for layout analysis methods which exploit textual features.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.209388732910156, 106.8985824584961, 295.56396484375, 182.471923828125], "page": 3, "span": [0, 413], "__ref_s3_data": null}], "text": "To ensure that future benchmarks in the document-layout analysis community can be easily compared, we have split up DocLayNet into pre-defined train-, test- and validation-sets. In this way, we can avoid spurious variations in the evaluation scores due to random splitting in train-, test- and validation-sets. We also ensured that less frequent labels are represented in train and test sets in equal proportions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.352603912353516, 83.35768127441406, 195.78997802734375, 91.47167205810547], "page": 3, "span": [0, 51], "__ref_s3_data": null}], "text": "$^{2}$e.g. AAPL from https://www.annualreports.com/", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [317.0691833496094, 630.5088500976562, 559.1918334960938, 705.8527221679688], "page": 3, "span": [0, 435], "__ref_s3_data": null}], "text": "Table 1 shows the overall frequency and distribution of the labels among the different sets. Importantly, we ensure that subsets are only split on full-document boundaries. This avoids that pages of the same document are spread over train, test and validation set, which can give an undesired evaluation advantage to models and lead to overestimation of their prediction accuracy. We will show the impact of this decision in Section 5.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.05938720703125, 520.8086547851562, 558.862060546875, 628.44580078125], "page": 3, "span": [0, 645], "__ref_s3_data": null}], "text": "In order to accommodate the different types of models currently in use by the community, we provide DocLayNet in an augmented COCO format [16]. This entails the standard COCO ground-truth file (in JSON format) with the associated page images (in PNG format, 1025 \u00d7 1025 pixels). Furthermore, custom fields have been added to each COCO record to specify document category, original document filename and page number. In addition, we also provide the original PDF pages, as well as sidecar files containing parsed PDF text and text-cell coordinates (in JSON). All additional files are linked to the primary page images by their matching filenames.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [316.88604736328125, 203.11082458496094, 559.7215576171875, 518.6715087890625], "page": 3, "span": [0, 1854], "__ref_s3_data": null}], "text": "Despite being cost-intense and far less scalable than automation, human annotation has several benefits over automated groundtruth generation. The first and most obvious reason to leverage human annotations is the freedom to annotate any type of document without requiring a programmatic source. For most PDF documents, the original source document is not available. The latter is not a hard constraint with human annotation, but it is for automated methods. A second reason to use human annotations is that the latter usually provide a more natural interpretation of the page layout. The human-interpreted layout can significantly deviate from the programmatic layout used in typesetting. For example, \"invisible\" tables might be used solely for aligning text paragraphs on columns. Such typesetting tricks might be interpreted by automated methods incorrectly as an actual table, while the human annotation will interpret it correctly as Text or other styles. The same applies to multi-line text elements, when authors decided to space them as \"invisible\" list elements without bullet symbols. A third reason to gather ground-truth through human annotation is to estimate a \"natural\" upper bound on the segmentation accuracy. As we will show in Section 4, certain documents featuring complex layouts can have different but equally acceptable layout interpretations. This natural upper bound for segmentation accuracy can be found by annotating the same pages multiple times by different people and evaluating the inter-annotator agreement. Such a baseline consistency evaluation is very useful to define expectations for a good target accuracy in trained deep neural network models and avoid overfitting (see Table 1). On the flip side, achieving high annotation consistency proved to be a key challenge in human annotation, as we outline in Section 4.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.66510009765625, 174.8409881591797, 470.2132568359375, 185.15008544921875], "page": 3, "span": [0, 21], "__ref_s3_data": null}], "text": "4 ANNOTATION CAMPAIGN", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [317.0245056152344, 85.38961791992188, 559.7138061523438, 160.93588256835938], "page": 3, "span": [0, 457], "__ref_s3_data": null}], "text": "The annotation campaign was carried out in four phases. In phase one, we identified and prepared the data sources for annotation. In phase two, we determined the class labels and how annotations should be done on the documents in order to obtain maximum consistency. The latter was guided by a detailed requirement analysis and exhaustive experiments. In phase three, we trained the annotation staff and performed exams for quality assurance. In phase four,", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.345272064208984, 723.0101318359375, 558.5491943359375, 732.1525268554688], "page": 4, "span": [0, 130], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [52.74671936035156, 676.2418212890625, 558.5100708007812, 707.6976928710938], "page": 4, "span": [0, 348], "__ref_s3_data": null}], "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"prov": [{"bbox": [53.28383255004883, 185.58580017089844, 295.64874267578125, 237.99000549316406], "page": 4, "span": [0, 281], "__ref_s3_data": null}], "text": "Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"prov": [{"bbox": [52.954681396484375, 116.45683288574219, 294.3648681640625, 158.3203887939453], "page": 4, "span": [0, 231], "__ref_s3_data": null}], "text": "we distributed the annotation workload and performed continuous quality controls. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.368797302246094, 83.57982635498047, 295.5584411621094, 114.14925384521484], "page": 4, "span": [0, 193], "__ref_s3_data": null}], "text": "Phase 1: Data selection and preparation. Our inclusion criteria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.2582702636719, 416.48919677734375, 559.1853637695312, 481.0997619628906], "page": 4, "span": [0, 376], "__ref_s3_data": null}], "text": "include publication repositories such as arXiv$^{3}$, government offices, company websites as well as data directory services for financial reports and patents. Scanned documents were excluded wherever possible because they can be rotated or skewed. This would not allow us to perform annotation with rectangular bounding-boxes and therefore complicate the annotation process.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.0777587890625, 284.9187316894531, 559.7130737304688, 415.02398681640625], "page": 4, "span": [0, 746], "__ref_s3_data": null}], "text": "Preparation work included uploading and parsing the sourced PDF documents in the Corpus Conversion Service (CCS) [22], a cloud-native platform which provides a visual annotation interface and allows for dataset inspection and analysis. The annotation interface of CCS is shown in Figure 3. The desired balance of pages between the different document categories was achieved by selective subsampling of pages with certain desired properties. For example, we made sure to include the title page of each document and bias the remaining page selection to those with figures or tables. The latter was achieved by leveraging pre-trained object detection models from PubLayNet, which helped us estimate how many figures and tables a given page contains.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [316.9024963378906, 98.9438247680664, 559.7176513671875, 283.8972473144531], "page": 4, "span": [0, 1159], "__ref_s3_data": null}], "text": "Phase 2: Label selection and guideline. We reviewed the collected documents and identified the most common structural features they exhibit. This was achieved by identifying recurrent layout elements and lead us to the definition of 11 distinct class labels. These 11 class labels are Caption , Footnote , Formula , List-item , Pagefooter , Page-header , Picture , Section-header , Table , Text , and Title . Critical factors that were considered for the choice of these class labels were (1) the overall occurrence of the label, (2) the specificity of the label, (3) recognisability on a single page (i.e. no need for context from previous or next page) and (4) overall coverage of the page. Specificity ensures that the choice of label is not ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and Affiliation , as seen in DocBank, are often only distinguishable by discriminating on", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.7030029296875, 82.5821304321289, 369.40142822265625, 90.54422760009766], "page": 4, "span": [0, 24], "__ref_s3_data": null}], "text": "$^{3}$https://arxiv.org/", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [53.456207275390625, 723.0143432617188, 347.07373046875, 732.0245361328125], "page": 5, "span": [0, 71], "__ref_s3_data": null}], "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [365.2621765136719, 723.0404663085938, 558.9374389648438, 731.9317626953125], "page": 5, "span": [0, 48], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [53.24338912963867, 684.8170166015625, 294.04541015625, 705.5283813476562], "page": 5, "span": [0, 135], "__ref_s3_data": null}], "text": "the textual content of an element, which goes beyond visual layout recognition, in particular outside the Scientific Articles category.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.124725341796875, 542.8159790039062, 295.5592346191406, 683.8748168945312], "page": 5, "span": [0, 812], "__ref_s3_data": null}], "text": "At first sight, the task of visual document-layout interpretation appears intuitive enough to obtain plausible annotations in most cases. However, during early trial-runs in the core team, we observed many cases in which annotators use different annotation styles, especially for documents with challenging layouts. For example, if a figure is presented with subfigures, one annotator might draw a single figure bounding-box, while another might annotate each subfigure separately. The same applies for lists, where one might annotate all list items in one block or each list item separately. In essence, we observed that challenging layouts would be annotated in different but plausible ways. To illustrate this, we show in Figure 4 multiple examples of plausible but inconsistent annotations on the same pages.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.339271545410156, 455.16583251953125, 295.56005859375, 541.1383666992188], "page": 5, "span": [0, 465], "__ref_s3_data": null}], "text": "Obviously, this inconsistency in annotations is not desirable for datasets which are intended to be used for model training. To minimise these inconsistencies, we created a detailed annotation guideline. While perfect consistency across 40 annotation staff members is clearly not possible to achieve, we saw a huge improvement in annotation consistency after the introduction of our annotation guideline. A few selected, non-trivial highlights of the guideline are:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.39098358154297, 402.13092041015625, 294.42474365234375, 444.29510498046875], "page": 5, "span": [0, 202], "__ref_s3_data": null}], "text": "(1) Every list-item is an individual object instance with class label List-item . This definition is different from PubLayNet and DocBank, where all list-items are grouped together into one List object.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.31100463867188, 358.39984130859375, 295.563720703125, 400.2758483886719], "page": 5, "span": [0, 208], "__ref_s3_data": null}], "text": "(2) A List-item is a paragraph with hanging indentation. Singleline elements can qualify as List-item if the neighbour elements expose hanging indentation. Bullet or enumeration symbols are not a requirement.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.26787567138672, 336.4728698730469, 294.60943603515625, 356.2404479980469], "page": 5, "span": [0, 82], "__ref_s3_data": null}], "text": "(3) For every Caption , there must be exactly one corresponding Picture or Table .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.2632064819336, 314.5648193359375, 294.7487487792969, 334.179443359375], "page": 5, "span": [0, 70], "__ref_s3_data": null}], "text": "(4) Connected sub-pictures are grouped together in one Picture object.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.9930305480957, 303.59686279296875, 264.5057067871094, 312.8252868652344], "page": 5, "span": [0, 53], "__ref_s3_data": null}], "text": "(5) Formula numbers are included in a Formula object.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.07823181152344, 270.048095703125, 295.0240783691406, 301.5160827636719], "page": 5, "span": [0, 160], "__ref_s3_data": null}], "text": "(6) Emphasised text (e.g. in italic or bold) at the beginning of a paragraph is not considered a Section-header , unless it appears exclusively on its own line.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [52.994422912597656, 217.798828125, 295.5625305175781, 259.6097106933594], "page": 5, "span": [0, 221], "__ref_s3_data": null}], "text": "The complete annotation guideline is over 100 pages long and a detailed description is obviously out of scope for this paper. Nevertheless, it will be made publicly available alongside with DocLayNet for future reference.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.26631546020508, 86.24749755859375, 295.562255859375, 215.95584106445312], "page": 5, "span": [0, 792], "__ref_s3_data": null}], "text": "Phase 3: Training. After a first trial with a small group of people, we realised that providing the annotation guideline and a set of random practice pages did not yield the desired quality level for layout annotation. Therefore we prepared a subset of pages with two different complexity levels, each with a practice and an exam part. 974 pages were reference-annotated by one proficient core team member. Annotation staff were then given the task to annotate the same subsets (blinded from the reference). By comparing the annotations of each staff member with the reference annotations, we could quantify how closely their annotations matched the reference. Only after passing two exam levels with high annotation quality, staff were admitted into the production phase. Practice iterations", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [316.9992980957031, 287.86785888671875, 559.8057861328125, 318.7776794433594], "page": 5, "span": [0, 173], "__ref_s3_data": null}], "text": "Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"prov": [{"bbox": [316.8349914550781, 247.1688232421875, 558.204345703125, 266.81207275390625], "page": 5, "span": [0, 123], "__ref_s3_data": null}], "text": "were carried out over a timeframe of 12 weeks, after which 8 of the 40 initially allocated annotators did not pass the bar.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.00592041015625, 82.7375717163086, 559.7149047851562, 245.28392028808594], "page": 5, "span": [0, 987], "__ref_s3_data": null}], "text": "Phase 4: Production annotation. The previously selected 80K pages were annotated with the defined 11 class labels by 32 annotators. This production phase took around three months to complete. All annotations were created online through CCS, which visualises the programmatic PDF text-cells as an overlay on the page. The page annotation are obtained by drawing rectangular bounding-boxes, as shown in Figure 3. With regard to the annotation practices, we implemented a few constraints and capabilities on the tooling level. First, we only allow non-overlapping, vertically oriented, rectangular boxes. For the large majority of documents, this constraint was sufficient and it speeds up the annotation considerably in comparison with arbitrary segmentation shapes. Second, annotator staff were not able to see each other's annotations. This was enforced by design to avoid any bias in the annotation, which could skew the numbers of the inter-annotator agreement (see Table 1). We wanted", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.30706024169922, 722.92333984375, 558.4274291992188, 732.1127319335938], "page": 6, "span": [0, 130], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [52.78031539916992, 608.98291015625, 295.64874267578125, 705.8385620117188], "page": 6, "span": [0, 489], "__ref_s3_data": null}], "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"prov": [{"bbox": [53.25688552856445, 214.2948760986328, 295.5561218261719, 421.4337158203125], "page": 6, "span": [0, 1252], "__ref_s3_data": null}], "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and Picture . For the latter, we instructed annotation staff to minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way to flag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in the final dataset. With all these measures in place, experienced annotation staff managed to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.62337875366211, 193.5609893798828, 147.4853515625, 203.87008666992188], "page": 6, "span": [0, 13], "__ref_s3_data": null}], "text": "5 EXPERIMENTS", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [53.076290130615234, 82.4822006225586, 295.4281005859375, 179.65382385253906], "page": 6, "span": [0, 584], "__ref_s3_data": null}], "text": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.10931396484375, 449.6510009765625, 559.8057861328125, 513.7953491210938], "page": 6, "span": [0, 329], "__ref_s3_data": null}], "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"prov": [{"bbox": [317.2011413574219, 388.6548156738281, 558.2041625976562, 408.8042297363281], "page": 6, "span": [0, 102], "__ref_s3_data": null}], "text": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.0830078125, 311.45587158203125, 558.4364013671875, 386.632568359375], "page": 6, "span": [0, 397], "__ref_s3_data": null}], "text": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16].", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.1941223144531, 284.5037841796875, 466.8532409667969, 295.42913818359375], "page": 6, "span": [0, 30], "__ref_s3_data": null}], "text": "Baselines for Object Detection", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [317.0144348144531, 85.2998275756836, 558.7822875976562, 280.8944396972656], "page": 6, "span": [0, 1146], "__ref_s3_data": null}], "text": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of 1025 \u00d7 1025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as Text , Table and Picture . This is not entirely surprising, as Text , Table and Picture are abundant and the most visually distinctive in a document.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.35094451904297, 722.9555053710938, 347.0172424316406, 732.038818359375], "page": 7, "span": [0, 71], "__ref_s3_data": null}], "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [365.1936950683594, 723.0802001953125, 558.7797241210938, 731.8773803710938], "page": 7, "span": [0, 48], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [52.8690299987793, 663.3739624023438, 295.6486511230469, 705.8510131835938], "page": 7, "span": [0, 205], "__ref_s3_data": null}], "text": "Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/2"}, {"prov": [{"bbox": [53.446834564208984, 461.592041015625, 131.05624389648438, 472.6955871582031], "page": 7, "span": [0, 14], "__ref_s3_data": null}], "text": "Learning Curve", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [52.78499984741211, 262.38037109375, 295.558349609375, 457.72955322265625], "page": 7, "span": [0, 1157], "__ref_s3_data": null}], "text": "One of the fundamental questions related to any dataset is if it is \"large enough\". To answer this question for DocLayNet, we performed a data ablation study in which we evaluated a Mask R-CNN model trained on increasing fractions of the DocLayNet dataset. As can be seen in Figure 5, the mAP score rises sharply in the beginning and eventually levels out. To estimate the error-bar on the metrics, we ran the training five times on the entire data-set. This resulted in a 1% error-bar, depicted by the shaded area in Figure 5. In the inset of Figure 5, we show the exact same data-points, but with a logarithmic scale on the x-axis. As is expected, the mAP score increases linearly as a function of the data-size in the inset. The curve ultimately flattens out between the 80% and 100% mark, with the 80% mark falling within the error-bars of the 100% mark. This provides a good indication that the model would not improve significantly by yet increasing the data size. Rather, it would probably benefit more from improved data consistency (as discussed in Section 3), data augmentation methods [23], or the addition of more document categories and styles.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.37664794921875, 239.1809844970703, 164.3289794921875, 250.044677734375], "page": 7, "span": [0, 22], "__ref_s3_data": null}], "text": "Impact of Class Labels", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [53.06760787963867, 83.39567565917969, 295.5567932128906, 235.12689208984375], "page": 7, "span": [0, 910], "__ref_s3_data": null}], "text": "The choice and number of labels can have a significant effect on the overall model performance. Since PubLayNet, DocBank and DocLayNet all have different label sets, it is of particular interest to understand and quantify this influence of the label set on the model performance. We investigate this by either down-mapping labels into more common ones (e.g. Caption \u2192 Text ) or excluding them from the annotations entirely. Furthermore, it must be stressed that all mappings and exclusions were performed on the data before model training. In Table 3, we present the mAP scores for a Mask R-CNN R50 network on different label sets. Where a label is down-mapped, we show its corresponding label, otherwise it was excluded. We present three different label sets, with 6, 5 and 4 different labels respectively. The set of 5 labels contains the same labels as PubLayNet. However, due to the different definition of", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [316.9989929199219, 663.7767944335938, 559.8068237304688, 705.6134643554688], "page": 7, "span": [0, 189], "__ref_s3_data": null}], "text": "Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/3"}, {"prov": [{"bbox": [317.03326416015625, 375.50982666015625, 559.5849609375, 460.6855163574219], "page": 7, "span": [0, 469], "__ref_s3_data": null}], "text": "lists in PubLayNet (grouped list-items) versus DocLayNet (separate list-items), the label set of size 4 is the closest to PubLayNet, in the assumption that the List is down-mapped to Text in PubLayNet. The results in Table 3 show that the prediction accuracy on the remaining class labels does not change significantly when other classes are merged into them. The overall macro-average improves by around 5%, in particular when Page-footer and Page-header are excluded.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.4661865234375, 351.4896545410156, 549.860595703125, 362.8900451660156], "page": 7, "span": [0, 46], "__ref_s3_data": null}], "text": "Impact of Document Split in Train and Test Set", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [316.9546813964844, 196.5628204345703, 559.7138061523438, 348.10198974609375], "page": 7, "span": [0, 852], "__ref_s3_data": null}], "text": "Many documents in DocLayNet have a unique styling. In order to avoid overfitting on a particular style, we have split the train-, test- and validation-sets of DocLayNet on document boundaries, i.e. every document contributes pages to only one set. To the best of our knowledge, this was not considered in PubLayNet or DocBank. To quantify how this affects model performance, we trained and evaluated a Mask R-CNN R50 model on a modified dataset version. Here, the train-, test- and validation-sets were obtained by a randomised draw over the individual pages. As can be seen in Table 4, the difference in model performance is surprisingly large: pagewise splitting gains \u02dc 10% in mAP over the document-wise splitting. Thus, random page-wise splitting of DocLayNet can easily lead to accidental overestimation of model performance and should be avoided.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.3337707519531, 173.20875549316406, 418.5477600097656, 183.94322204589844], "page": 7, "span": [0, 18], "__ref_s3_data": null}], "text": "Dataset Comparison", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [316.7283935546875, 83.24566650390625, 559.1881713867188, 168.86700439453125], "page": 7, "span": [0, 521], "__ref_s3_data": null}], "text": "Throughout this paper, we claim that DocLayNet's wider variety of document layouts leads to more robust layout detection models. In Table 5, we provide evidence for that. We trained models on each of the available datasets (PubLayNet, DocBank and DocLayNet) and evaluated them on the test sets of the other datasets. Due to the different label sets and annotation styles, a direct comparison is not possible. Hence, we focussed on the common labels among the datasets. Between PubLayNet and DocLayNet, these are Picture ,", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.288330078125, 722.9171142578125, 558.4634399414062, 732.134033203125], "page": 8, "span": [0, 130], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [52.89757537841797, 641.85888671875, 295.648681640625, 705.7824096679688], "page": 8, "span": [0, 298], "__ref_s3_data": null}], "text": "Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/4"}, {"prov": [{"bbox": [53.279537200927734, 348.85986328125, 294.6396789550781, 401.5162658691406], "page": 8, "span": [0, 295], "__ref_s3_data": null}], "text": "Section-header , Table and Text . Before training, we either mapped or excluded DocLayNet's other labels as specified in table 3, and also PubLayNet's List to Text . Note that the different clustering of lists (by list-element vs. whole list objects) naturally decreases the mAP score for Text .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.04817581176758, 205.98951721191406, 295.55908203125, 346.9607849121094], "page": 8, "span": [0, 793], "__ref_s3_data": null}], "text": "For comparison of DocBank with DocLayNet, we trained only on Picture and Table clusters of each dataset. We had to exclude Text because successive paragraphs are often grouped together into a single object in DocBank. This paragraph grouping is incompatible with the individual paragraphs of DocLayNet. As can be seen in Table 5, DocLayNet trained models yield better performance compared to the previous datasets. It is noteworthy that the models trained on PubLayNet and DocBank perform very well on their own test set, but have a much lower performance on the foreign datasets. While this also applies to DocLayNet, the difference is far less pronounced. Thus we conclude that DocLayNet trained models are overall more robust and will produce better results for challenging, unseen layouts.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.05388259887695, 176.33340454101562, 156.02235412597656, 187.29098510742188], "page": 8, "span": [0, 19], "__ref_s3_data": null}], "text": "Example Predictions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [53.07720184326172, 86.64982604980469, 295.5584411621094, 172.26492309570312], "page": 8, "span": [0, 481], "__ref_s3_data": null}], "text": "To conclude this section, we illustrate the quality of layout predictions one can expect from DocLayNet-trained models by providing a selection of examples without any further post-processing applied. Figure 6 shows selected layout predictions on pages from the test-set of DocLayNet. Results look decent in general across document categories, however one can also observe mistakes such as overlapping clusters of different classes, or entirely missing boxes due to low confidence.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.4961853027344, 695.8309936523438, 405.7296142578125, 706.4700317382812], "page": 8, "span": [0, 12], "__ref_s3_data": null}], "text": "6 CONCLUSION", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [317.0487976074219, 605.4117431640625, 559.7137451171875, 691.6207275390625], "page": 8, "span": [0, 507], "__ref_s3_data": null}], "text": "In this paper, we presented the DocLayNet dataset. It provides the document conversion and layout analysis research community a new and challenging dataset to improve and fine-tune novel ML methods on. In contrast to many other datasets, DocLayNet was created by human annotation in order to obtain reliable layout ground-truth on a wide variety of publication- and typesettingstyles. Including a large proportion of documents outside the scientific publishing domain adds significant value in this respect.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.03955078125, 506.7440185546875, 559.717041015625, 603.672607421875], "page": 8, "span": [0, 573], "__ref_s3_data": null}], "text": "From the dataset, we have derived on the one hand reference metrics for human performance on document-layout annotation (through double and triple annotations) and on the other hand evaluated the baseline performance of commonly used object detection methods. We also illustrated the impact of various dataset-related aspects on model performance through data-ablation experiments, both from a size and class-label perspective. Last but not least, we compared the accuracy of models trained on other public datasets and showed that DocLayNet trained models are more robust.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.1865234375, 474.2935791015625, 558.6325073242188, 505.4895324707031], "page": 8, "span": [0, 188], "__ref_s3_data": null}], "text": "To date, there is still a significant gap between human and ML accuracy on the layout interpretation task, and we hope that this work will inspire the research community to close that gap.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [317.4455871582031, 446.5990295410156, 387.5806579589844, 457.4013366699219], "page": 8, "span": [0, 10], "__ref_s3_data": null}], "text": "REFERENCES", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [320.5848693847656, 420.8371276855469, 559.0187377929688, 444.4063415527344], "page": 8, "span": [0, 191], "__ref_s3_data": null}], "text": "[1] Max G\u00f6bel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.76806640625, 388.9571228027344, 559.7276000976562, 420.2254333496094], "page": 8, "span": [0, 279], "__ref_s3_data": null}], "text": "[2] Christian Clausner, Apostolos Antonacopoulos, and Stefan Pletschacher. Icdar2017 competition on recognition of documents with complex layouts rdcl2017. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 1404-1410, 2017.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.58111572265625, 364.88128662109375, 558.4269409179688, 388.028076171875], "page": 8, "span": [0, 213], "__ref_s3_data": null}], "text": "[3] Herv\u00e9 D\u00e9jean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), April 2019. http://sac.founderit.com/.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.72210693359375, 333.173095703125, 559.3787231445312, 364.17962646484375], "page": 8, "span": [0, 251], "__ref_s3_data": null}], "text": "[4] Antonio Jimeno Yepes, Peter Zhong, and Douglas Burdick. Competition on scientific literature parsing. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 605-617. LNCS 12824, SpringerVerlag, sep 2021.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.47723388671875, 300.9960021972656, 559.2555541992188, 332.2057800292969], "page": 8, "span": [0, 261], "__ref_s3_data": null}], "text": "[5] Logan Markewich, Hao Zhang, Yubin Xing, Navid Lambert-Shirzad, Jiang Zhexin, Roy Lee, Zhi Li, and Seok-Bum Ko. Segmentation for document layout analysis: not dead yet. International Journal on Document Analysis and Recognition (IJDAR) , pages 1-11, 01 2022.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.7210998535156, 277.3751220703125, 558.6044921875, 300.1542053222656], "page": 8, "span": [0, 235], "__ref_s3_data": null}], "text": "[6] Xu Zhong, Jianbin Tang, and Antonio Jimeno-Yepes. Publaynet: Largest dataset ever for document layout analysis. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 1015-1022, sep 2019.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.7048034667969, 237.53111267089844, 559.0962524414062, 276.57550048828125], "page": 8, "span": [0, 316], "__ref_s3_data": null}], "text": "[7] Minghao Li, Yiheng Xu, Lei Cui, Shaohan Huang, Furu Wei, Zhoujun Li, and Ming Zhou. Docbank: A benchmark dataset for document layout analysis. In Proceedings of the 28th International Conference on Computational Linguistics , COLING, pages 949-960. International Committee on Computational Linguistics, dec 2020.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.6175537109375, 213.6141357421875, 558.9022216796875, 236.84490966796875], "page": 8, "span": [0, 172], "__ref_s3_data": null}], "text": "[8] Riaz Ahmad, Muhammad Tanvir Afzal, and M. Qadir. Information extraction from pdf sources based on rule-based system using integrated formats. In SemWebEval@ESWC , 2016.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [320.695556640625, 181.74110412597656, 559.2744750976562, 212.77767944335938], "page": 8, "span": [0, 271], "__ref_s3_data": null}], "text": "[9] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In IEEE Conference on Computer Vision and Pattern Recognition , CVPR, pages 580-587. IEEE Computer Society, jun 2014.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.74908447265625, 165.5072479248047, 558.8585205078125, 181.0753173828125], "page": 8, "span": [0, 149], "__ref_s3_data": null}], "text": "[10] Ross B. Girshick. Fast R-CNN. In 2015 IEEE International Conference on Computer Vision , ICCV, pages 1440-1448. IEEE Computer Society, dec 2015.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.71527099609375, 141.8831329345703, 558.4170532226562, 164.63047790527344], "page": 8, "span": [0, 227], "__ref_s3_data": null}], "text": "[11] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis and Machine Intelligence , 39(6):1137-1149, 2017.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.5010986328125, 117.60646057128906, 559.278076171875, 141.50643920898438], "page": 8, "span": [0, 192], "__ref_s3_data": null}], "text": "[12] Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross B. Girshick. Mask R-CNN. In IEEE International Conference on Computer Vision , ICCV, pages 2980-2988. IEEE Computer Society, Oct 2017.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.4837341308594, 86.09910583496094, 559.0487670898438, 116.94155883789062], "page": 8, "span": [0, 305], "__ref_s3_data": null}], "text": "[13] Glenn Jocher, Alex Stoken, Ayush Chaurasia, Jirka Borovec, NanoCode012, TaoXie, Yonghye Kwon, Kalen Michael, Liu Changyu, Jiacong Fang, Abhiram V, Laughing, tkianai, yxNONG, Piotr Skalski, Adam Hogan, Jebastin Nadar, imyhxy, Lorenzo Mammana, Alex Wang, Cristi Fati, Diego Montes, Jan Hajek, Laurentiu", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.55940246582031, 722.9329223632812, 347.0838623046875, 731.9924926757812], "page": 9, "span": [0, 71], "__ref_s3_data": null}], "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [365.1275329589844, 723.0497436523438, 558.905029296875, 731.96435546875], "page": 9, "span": [0, 48], "__ref_s3_data": null}], "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [53.39582824707031, 285.65704345703125, 559.807861328125, 328.056396484375], "page": 9, "span": [0, 386], "__ref_s3_data": null}], "text": "Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"prov": [{"bbox": [68.69137573242188, 242.22409057617188, 295.22406005859375, 265.4314270019531], "page": 9, "span": [0, 195], "__ref_s3_data": null}], "text": "Diaconu, Mai Thanh Minh, Marc, albinxavi, fatih, oleg, and wanghao yang. ultralytics/yolov5: v6.0 - yolov5n nano models, roboflow integration, tensorflow export, opencv dnn support, October 2021.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [53.56020736694336, 218.56314086914062, 295.12176513671875, 241.63282775878906], "page": 9, "span": [0, 190], "__ref_s3_data": null}], "text": "[14] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-end object detection with transformers. CoRR , abs/2005.12872, 2020.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.61275863647461, 202.62213134765625, 294.3653869628906, 217.57615661621094], "page": 9, "span": [0, 132], "__ref_s3_data": null}], "text": "[15] Mingxing Tan, Ruoming Pang, and Quoc V. Le. Efficientdet: Scalable and efficient object detection. CoRR , abs/1911.09070, 2019.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.668941497802734, 178.71910095214844, 295.2226257324219, 201.57443237304688], "page": 9, "span": [0, 219], "__ref_s3_data": null}], "text": "[16] Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. Microsoft COCO: common objects in context, 2014.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.54263687133789, 162.77911376953125, 295.1200866699219, 178.3345947265625], "page": 9, "span": [0, 100], "__ref_s3_data": null}], "text": "[17] Yuxin Wu, Alexander Kirillov, Francisco Massa, Wan-Yen Lo, and Ross Girshick. Detectron2, 2019.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.569610595703125, 122.92810821533203, 294.8847351074219, 162.23497009277344], "page": 9, "span": [0, 339], "__ref_s3_data": null}], "text": "[18] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter W. J. Staar. Robust pdf document conversion using recurrent neural networks. In Proceedings of the 35th Conference on Artificial Intelligence , AAAI, pages 1513715145, feb 2021.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [53.4610595703125, 82.67352294921875, 295.22174072265625, 122.19474029541016], "page": 9, "span": [0, 336], "__ref_s3_data": null}], "text": "[19] Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. Layoutlm: Pre-training of text and layout for document image understanding. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 1192-1200, New York, USA, 2020. Association for Computing Machinery.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.6278076171875, 249.62921142578125, 559.0263671875, 265.5798645019531], "page": 9, "span": [0, 153], "__ref_s3_data": null}], "text": "[20] Shoubin Li, Xuyan Ma, Shuaiqun Pan, Jun Hu, Lin Shi, and Qing Wang. Vtlayout: Fusion of visual and text features for document layout analysis, 2021.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.53033447265625, 226.54010009765625, 559.0158081054688, 249.28826904296875], "page": 9, "span": [0, 188], "__ref_s3_data": null}], "text": "[21] Peng Zhang, Can Li, Liang Qiao, Zhanzhan Cheng, Shiliang Pu, Yi Niu, and Fei Wu. Vsr: A unified framework for document layout analysis combining vision, semantics and relations, 2021.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.6616516113281, 194.28546142578125, 559.275390625, 225.54457092285156], "page": 9, "span": [0, 290], "__ref_s3_data": null}], "text": "[22] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 774-782. ACM, 2018.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [317.65606689453125, 178.71212768554688, 559.3782958984375, 193.30506896972656], "page": 9, "span": [0, 138], "__ref_s3_data": null}], "text": "[23] Connor Shorten and Taghi M. Khoshgoftaar. A survey on image data augmentation for deep learning. Journal of Big Data , 6(1):60, 2019.", "type": "paragraph", "name": "List-item", "font": null}], "figures": [{"prov": [{"bbox": [324.3027038574219, 266.1221618652344, 554.91796875, 543.5838623046875], "page": 1, "span": [0, 84], "__ref_s3_data": null}], "text": "Figure 1: Four examples of complex page layouts across different document categories", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [88.16680145263672, 569.726806640625, 264.2818298339844, 698.8894653320312], "page": 3, "span": [0, 69], "__ref_s3_data": null}], "text": "Figure 2: Distribution of DocLayNet pages across document categories.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [53.179771423339844, 250.80191040039062, 295.3565368652344, 481.6382141113281], "page": 4, "span": [0, 281], "__ref_s3_data": null}], "text": "Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [315.8857116699219, 331.43994140625, 559.6527709960938, 707.0224609375], "page": 5, "span": [0, 173], "__ref_s3_data": null}], "text": "Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [322.7086486816406, 531.372314453125, 553.7246704101562, 701.6975708007812], "page": 6, "span": [0, 329], "__ref_s3_data": null}], "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [53.59891891479492, 343.73516845703125, 554.9424438476562, 708.443115234375], "page": 9, "span": [0, 386], "__ref_s3_data": null}], "text": "Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes.", "type": "figure", "bounding-box": null}], "tables": [{"prov": [{"bbox": [98.96420288085938, 498.30108642578125, 512.7739868164062, 654.1231689453125], "page": 4, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges.", "type": "table", "#-cols": 12, "#-rows": 14, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [233.94400024414062, 643.40185546875, 270.042724609375, 651.7764892578125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "% of Total", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [233.94400024414062, 643.40185546875, 270.042724609375, 651.7764892578125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "% of Total", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [233.94400024414062, 643.40185546875, 270.042724609375, 651.7764892578125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "% of Total", "type": "col_header", "col": 4, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 6, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 7, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 8, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 9, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 10, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [329.04998779296875, 643.40185546875, 483.39764404296875, 651.7764892578125], "spans": [[0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11]], "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "type": "col_header", "col": 11, "col-header": false, "col-span": [5, 12], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [104.82499694824219, 632.4428100585938, 141.7127685546875, 640.8174438476562], "spans": [[1, 0]], "text": "class label", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [175.94700622558594, 632.4428100585938, 198.7126922607422, 640.8174438476562], "spans": [[1, 1]], "text": "Count", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [213.7949981689453, 632.4428100585938, 233.69143676757812, 640.8174438476562], "spans": [[1, 2]], "text": "Train", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [249.37367248535156, 632.4428100585938, 264.5, 640.8174438476562], "spans": [[1, 3]], "text": "Test", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [283.5356750488281, 632.4428100585938, 295.3085632324219, 640.8174438476562], "spans": [[1, 4]], "text": "Val", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [314.0150146484375, 632.4428100585938, 324.9809265136719, 640.8174438476562], "spans": [[1, 5]], "text": "All", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [343.0123596191406, 632.4428100585938, 354.6507568359375, 640.8174438476562], "spans": [[1, 6]], "text": "Fin", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [367.84033203125, 632.4428100585938, 384.3205871582031, 640.8174438476562], "spans": [[1, 7]], "text": "Man", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [407.5435791015625, 632.4428100585938, 418.1597900390625, 640.8174438476562], "spans": [[1, 8]], "text": "Sci", "type": "col_header", "col": 8, "col-header": false, "col-span": [8, 9], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [432.2998046875, 632.4428100585938, 447.8296203613281, 640.8174438476562], "spans": [[1, 9]], "text": "Law", "type": "col_header", "col": 9, "col-header": false, "col-span": [9, 10], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [465.7265625, 632.4428100585938, 477.5084228515625, 640.8174438476562], "spans": [[1, 10]], "text": "Pat", "type": "col_header", "col": 10, "col-header": false, "col-span": [10, 11], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [493.52239990234375, 632.4428100585938, 507.17822265625, 640.8174438476562], "spans": [[1, 11]], "text": "Ten", "type": "col_header", "col": 11, "col-header": false, "col-span": [11, 12], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [104.82499694824219, 621.0858154296875, 134.01063537597656, 629.46044921875], "spans": [[2, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [177.86599731445312, 621.0858154296875, 198.71287536621094, 629.46044921875], "spans": [[2, 1]], "text": "22524", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [219.21099853515625, 621.0858154296875, 233.69174194335938, 629.46044921875], "spans": [[2, 2]], "text": "2.04", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [250.01956176757812, 621.0858154296875, 264.50030517578125, 629.46044921875], "spans": [[2, 3]], "text": "1.77", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [280.828125, 621.0858154296875, 295.3088684082031, 629.46044921875], "spans": [[2, 4]], "text": "2.32", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [305.27301025390625, 621.0858154296875, 324.9811706542969, 629.46044921875], "spans": [[2, 5]], "text": "84-89", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [334.9428405761719, 621.0858154296875, 354.6510009765625, 629.46044921875], "spans": [[2, 6]], "text": "40-61", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [364.6126708984375, 621.0858154296875, 384.3208312988281, 629.46044921875], "spans": [[2, 7]], "text": "86-92", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [398.4518737792969, 621.0858154296875, 418.1600341796875, 629.46044921875], "spans": [[2, 8]], "text": "94-99", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [428.1217041015625, 621.0858154296875, 447.8298645019531, 629.46044921875], "spans": [[2, 9]], "text": "95-99", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [457.8005065917969, 621.0858154296875, 477.5086669921875, 629.46044921875], "spans": [[2, 10]], "text": "69-78", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [495.32489013671875, 621.0858154296875, 507.178466796875, 629.46044921875], "spans": [[2, 11]], "text": "n/a", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [104.82499694824219, 610.1268310546875, 137.3282012939453, 618.50146484375], "spans": [[3, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [182.03500366210938, 610.1268310546875, 198.71250915527344, 618.50146484375], "spans": [[3, 1]], "text": "6318", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [219.21099853515625, 610.1268310546875, 233.69174194335938, 618.50146484375], "spans": [[3, 2]], "text": "0.60", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [250.01956176757812, 610.1268310546875, 264.50030517578125, 618.50146484375], "spans": [[3, 3]], "text": "0.31", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [280.828125, 610.1268310546875, 295.3088684082031, 618.50146484375], "spans": [[3, 4]], "text": "0.58", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [305.27301025390625, 610.1268310546875, 324.9811706542969, 618.50146484375], "spans": [[3, 5]], "text": "83-91", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [342.7973937988281, 610.1268310546875, 354.6509704589844, 618.50146484375], "spans": [[3, 6]], "text": "n/a", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [371.8126525878906, 610.1268310546875, 384.3207702636719, 618.50146484375], "spans": [[3, 7]], "text": "100", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [398.4518127441406, 610.1268310546875, 418.15997314453125, 618.50146484375], "spans": [[3, 8]], "text": "62-88", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [428.12164306640625, 610.1268310546875, 447.8298034667969, 618.50146484375], "spans": [[3, 9]], "text": "85-94", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [465.6549987792969, 610.1268310546875, 477.5085754394531, 618.50146484375], "spans": [[3, 10]], "text": "n/a", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [487.4702453613281, 610.1268310546875, 507.17840576171875, 618.50146484375], "spans": [[3, 11]], "text": "82-97", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [104.82499694824219, 599.1678466796875, 135.33766174316406, 607.54248046875], "spans": [[4, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [177.86599731445312, 599.1678466796875, 198.71287536621094, 607.54248046875], "spans": [[4, 1]], "text": "25027", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [219.21099853515625, 599.1678466796875, 233.69174194335938, 607.54248046875], "spans": [[4, 2]], "text": "2.25", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [250.01956176757812, 599.1678466796875, 264.50030517578125, 607.54248046875], "spans": [[4, 3]], "text": "1.90", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [280.828125, 599.1678466796875, 295.3088684082031, 607.54248046875], "spans": [[4, 4]], "text": "2.96", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [305.27301025390625, 599.1678466796875, 324.9811706542969, 607.54248046875], "spans": [[4, 5]], "text": "83-85", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [342.7973937988281, 599.1678466796875, 354.6509704589844, 607.54248046875], "spans": [[4, 6]], "text": "n/a", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [372.4671936035156, 599.1678466796875, 384.3207702636719, 607.54248046875], "spans": [[4, 7]], "text": "n/a", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [398.4518127441406, 599.1678466796875, 418.15997314453125, 607.54248046875], "spans": [[4, 8]], "text": "84-87", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [428.12164306640625, 599.1678466796875, 447.8298034667969, 607.54248046875], "spans": [[4, 9]], "text": "86-96", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [465.6549987792969, 599.1678466796875, 477.5085754394531, 607.54248046875], "spans": [[4, 10]], "text": "n/a", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [495.3247985839844, 599.1678466796875, 507.1783752441406, 607.54248046875], "spans": [[4, 11]], "text": "n/a", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [104.82499694824219, 588.2088012695312, 137.7047882080078, 596.5834350585938], "spans": [[5, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [173.69700622558594, 588.2088012695312, 198.7132568359375, 596.5834350585938], "spans": [[5, 1]], "text": "185660", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [215.04200744628906, 588.2088012695312, 233.69212341308594, 596.5834350585938], "spans": [[5, 2]], "text": "17.19", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [245.85055541992188, 588.2088012695312, 264.50067138671875, 596.5834350585938], "spans": [[5, 3]], "text": "13.34", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [276.65911865234375, 588.2088012695312, 295.3092346191406, 596.5834350585938], "spans": [[5, 4]], "text": "15.82", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [305.27301025390625, 588.2088012695312, 324.9811706542969, 596.5834350585938], "spans": [[5, 5]], "text": "87-88", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [334.9428405761719, 588.2088012695312, 354.6510009765625, 596.5834350585938], "spans": [[5, 6]], "text": "74-83", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [364.6126708984375, 588.2088012695312, 384.3208312988281, 596.5834350585938], "spans": [[5, 7]], "text": "90-92", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [398.4518737792969, 588.2088012695312, 418.1600341796875, 596.5834350585938], "spans": [[5, 8]], "text": "97-97", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [428.1217041015625, 588.2088012695312, 447.8298645019531, 596.5834350585938], "spans": [[5, 9]], "text": "81-85", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [457.8005065917969, 588.2088012695312, 477.5086669921875, 596.5834350585938], "spans": [[5, 10]], "text": "75-88", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [487.4703369140625, 588.2088012695312, 507.1784973144531, 596.5834350585938], "spans": [[5, 11]], "text": "93-95", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [104.82499694824219, 577.2498168945312, 147.3526153564453, 585.6244506835938], "spans": [[6, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [177.86599731445312, 577.2498168945312, 198.71287536621094, 585.6244506835938], "spans": [[6, 1]], "text": "70878", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [219.21099853515625, 577.2498168945312, 233.69174194335938, 585.6244506835938], "spans": [[6, 2]], "text": "6.51", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [250.01956176757812, 577.2498168945312, 264.50030517578125, 585.6244506835938], "spans": [[6, 3]], "text": "5.58", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [280.828125, 577.2498168945312, 295.3088684082031, 585.6244506835938], "spans": [[6, 4]], "text": "6.00", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [305.27301025390625, 577.2498168945312, 324.9811706542969, 585.6244506835938], "spans": [[6, 5]], "text": "93-94", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [334.9428405761719, 577.2498168945312, 354.6510009765625, 585.6244506835938], "spans": [[6, 6]], "text": "88-90", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [364.6126708984375, 577.2498168945312, 384.3208312988281, 585.6244506835938], "spans": [[6, 7]], "text": "95-96", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [405.6518859863281, 577.2498168945312, 418.1600036621094, 585.6244506835938], "spans": [[6, 8]], "text": "100", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [428.1216735839844, 577.2498168945312, 447.829833984375, 585.6244506835938], "spans": [[6, 9]], "text": "92-97", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [465.00048828125, 577.2498168945312, 477.50860595703125, 585.6244506835938], "spans": [[6, 10]], "text": "100", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [487.47027587890625, 577.2498168945312, 507.1784362792969, 585.6244506835938], "spans": [[6, 11]], "text": "96-98", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [104.82499694824219, 566.2908325195312, 150.10531616210938, 574.6654663085938], "spans": [[7, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [177.86599731445312, 566.2908325195312, 198.71287536621094, 574.6654663085938], "spans": [[7, 1]], "text": "58022", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [219.21099853515625, 566.2908325195312, 233.69174194335938, 574.6654663085938], "spans": [[7, 2]], "text": "5.10", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [250.01956176757812, 566.2908325195312, 264.50030517578125, 574.6654663085938], "spans": [[7, 3]], "text": "6.70", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [280.828125, 566.2908325195312, 295.3088684082031, 574.6654663085938], "spans": [[7, 4]], "text": "5.06", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [305.27301025390625, 566.2908325195312, 324.9811706542969, 574.6654663085938], "spans": [[7, 5]], "text": "85-89", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [334.9428405761719, 566.2908325195312, 354.6510009765625, 574.6654663085938], "spans": [[7, 6]], "text": "66-76", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [364.6126708984375, 566.2908325195312, 384.3208312988281, 574.6654663085938], "spans": [[7, 7]], "text": "90-94", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [394.2825012207031, 566.2908325195312, 418.1600341796875, 574.6654663085938], "spans": [[7, 8]], "text": "98-100", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [428.1217041015625, 566.2908325195312, 447.8298645019531, 574.6654663085938], "spans": [[7, 9]], "text": "91-92", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [457.8005065917969, 566.2908325195312, 477.5086669921875, 574.6654663085938], "spans": [[7, 10]], "text": "97-99", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [487.4703369140625, 566.2908325195312, 507.1784973144531, 574.6654663085938], "spans": [[7, 11]], "text": "81-86", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [104.82499694824219, 555.3318481445312, 130.80963134765625, 563.7064819335938], "spans": [[8, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [177.86599731445312, 555.3318481445312, 198.71287536621094, 563.7064819335938], "spans": [[8, 1]], "text": "45976", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [219.21099853515625, 555.3318481445312, 233.69174194335938, 563.7064819335938], "spans": [[8, 2]], "text": "4.21", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [250.01956176757812, 555.3318481445312, 264.50030517578125, 563.7064819335938], "spans": [[8, 3]], "text": "2.78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [280.828125, 555.3318481445312, 295.3088684082031, 563.7064819335938], "spans": [[8, 4]], "text": "5.31", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [305.27301025390625, 555.3318481445312, 324.9811706542969, 563.7064819335938], "spans": [[8, 5]], "text": "69-71", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [334.9428405761719, 555.3318481445312, 354.6510009765625, 563.7064819335938], "spans": [[8, 6]], "text": "56-59", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [364.6126708984375, 555.3318481445312, 384.3208312988281, 563.7064819335938], "spans": [[8, 7]], "text": "82-86", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [398.4518737792969, 555.3318481445312, 418.1600341796875, 563.7064819335938], "spans": [[8, 8]], "text": "69-82", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [428.1217041015625, 555.3318481445312, 447.8298645019531, 563.7064819335938], "spans": [[8, 9]], "text": "80-95", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [457.8005065917969, 555.3318481445312, 477.5086669921875, 563.7064819335938], "spans": [[8, 10]], "text": "66-71", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [487.4703369140625, 555.3318481445312, 507.1784973144531, 563.7064819335938], "spans": [[8, 11]], "text": "59-76", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [104.82499694824219, 544.372802734375, 159.5648651123047, 552.7474365234375], "spans": [[9, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [173.69700622558594, 544.372802734375, 198.7132568359375, 552.7474365234375], "spans": [[9, 1]], "text": "142884", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [215.04200744628906, 544.372802734375, 233.69212341308594, 552.7474365234375], "spans": [[9, 2]], "text": "12.60", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [245.85055541992188, 544.372802734375, 264.50067138671875, 552.7474365234375], "spans": [[9, 3]], "text": "15.77", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [276.65911865234375, 544.372802734375, 295.3092346191406, 552.7474365234375], "spans": [[9, 4]], "text": "12.85", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [305.27301025390625, 544.372802734375, 324.9811706542969, 552.7474365234375], "spans": [[9, 5]], "text": "83-84", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [334.9428405761719, 544.372802734375, 354.6510009765625, 552.7474365234375], "spans": [[9, 6]], "text": "76-81", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [364.6126708984375, 544.372802734375, 384.3208312988281, 552.7474365234375], "spans": [[9, 7]], "text": "90-92", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [398.4518737792969, 544.372802734375, 418.1600341796875, 552.7474365234375], "spans": [[9, 8]], "text": "94-95", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [428.1217041015625, 544.372802734375, 447.8298645019531, 552.7474365234375], "spans": [[9, 9]], "text": "87-94", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [457.8005065917969, 544.372802734375, 477.5086669921875, 552.7474365234375], "spans": [[9, 10]], "text": "69-73", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [487.4703369140625, 544.372802734375, 507.1784973144531, 552.7474365234375], "spans": [[9, 11]], "text": "78-86", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [104.82499694824219, 533.413818359375, 124.63176727294922, 541.7884521484375], "spans": [[10, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [177.86599731445312, 533.413818359375, 198.71287536621094, 541.7884521484375], "spans": [[10, 1]], "text": "34733", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [219.21099853515625, 533.413818359375, 233.69174194335938, 541.7884521484375], "spans": [[10, 2]], "text": "3.20", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [250.01956176757812, 533.413818359375, 264.50030517578125, 541.7884521484375], "spans": [[10, 3]], "text": "2.27", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [280.828125, 533.413818359375, 295.3088684082031, 541.7884521484375], "spans": [[10, 4]], "text": "3.60", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [305.27301025390625, 533.413818359375, 324.9811706542969, 541.7884521484375], "spans": [[10, 5]], "text": "77-81", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [334.9428405761719, 533.413818359375, 354.6510009765625, 541.7884521484375], "spans": [[10, 6]], "text": "75-80", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [364.6126708984375, 533.413818359375, 384.3208312988281, 541.7884521484375], "spans": [[10, 7]], "text": "83-86", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [398.4518737792969, 533.413818359375, 418.1600341796875, 541.7884521484375], "spans": [[10, 8]], "text": "98-99", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [428.1217041015625, 533.413818359375, 447.8298645019531, 541.7884521484375], "spans": [[10, 9]], "text": "58-80", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [457.8005065917969, 533.413818359375, 477.5086669921875, 541.7884521484375], "spans": [[10, 10]], "text": "79-84", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [487.4703369140625, 533.413818359375, 507.1784973144531, 541.7884521484375], "spans": [[10, 11]], "text": "70-85", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [104.82499694824219, 522.455810546875, 120.78518676757812, 530.8304443359375], "spans": [[11, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [173.69700622558594, 522.455810546875, 198.7132568359375, 530.8304443359375], "spans": [[11, 1]], "text": "510377", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [215.04200744628906, 522.455810546875, 233.69212341308594, 530.8304443359375], "spans": [[11, 2]], "text": "45.82", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [245.85055541992188, 522.455810546875, 264.50067138671875, 530.8304443359375], "spans": [[11, 3]], "text": "49.28", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [276.65911865234375, 522.455810546875, 295.3092346191406, 530.8304443359375], "spans": [[11, 4]], "text": "45.00", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [305.27301025390625, 522.455810546875, 324.9811706542969, 530.8304443359375], "spans": [[11, 5]], "text": "84-86", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [334.9428405761719, 522.455810546875, 354.6510009765625, 530.8304443359375], "spans": [[11, 6]], "text": "81-86", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [364.6126708984375, 522.455810546875, 384.3208312988281, 530.8304443359375], "spans": [[11, 7]], "text": "88-93", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [398.4518737792969, 522.455810546875, 418.1600341796875, 530.8304443359375], "spans": [[11, 8]], "text": "89-93", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [428.1217041015625, 522.455810546875, 447.8298645019531, 530.8304443359375], "spans": [[11, 9]], "text": "87-92", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [457.8005065917969, 522.455810546875, 477.5086669921875, 530.8304443359375], "spans": [[11, 10]], "text": "71-79", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [487.4703369140625, 522.455810546875, 507.1784973144531, 530.8304443359375], "spans": [[11, 11]], "text": "87-95", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [104.82499694824219, 511.496826171875, 121.81632995605469, 519.8714599609375], "spans": [[12, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [182.03500366210938, 511.496826171875, 198.71250915527344, 519.8714599609375], "spans": [[12, 1]], "text": "5071", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [219.21099853515625, 511.496826171875, 233.69174194335938, 519.8714599609375], "spans": [[12, 2]], "text": "0.47", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [250.01956176757812, 511.496826171875, 264.50030517578125, 519.8714599609375], "spans": [[12, 3]], "text": "0.30", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [280.828125, 511.496826171875, 295.3088684082031, 519.8714599609375], "spans": [[12, 4]], "text": "0.50", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [305.27301025390625, 511.496826171875, 324.9811706542969, 519.8714599609375], "spans": [[12, 5]], "text": "60-72", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [334.9428405761719, 511.496826171875, 354.6510009765625, 519.8714599609375], "spans": [[12, 6]], "text": "24-63", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [364.6126708984375, 511.496826171875, 384.3208312988281, 519.8714599609375], "spans": [[12, 7]], "text": "50-63", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [394.2825012207031, 511.496826171875, 418.1600341796875, 519.8714599609375], "spans": [[12, 8]], "text": "94-100", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [428.1217041015625, 511.496826171875, 447.8298645019531, 519.8714599609375], "spans": [[12, 9]], "text": "82-96", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [457.8005065917969, 511.496826171875, 477.5086669921875, 519.8714599609375], "spans": [[12, 10]], "text": "68-79", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [487.4703369140625, 511.496826171875, 507.1784973144531, 519.8714599609375], "spans": [[12, 11]], "text": "24-56", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [104.82499694824219, 500.1388244628906, 123.43028259277344, 508.5134582519531], "spans": [[13, 0]], "text": "Total", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [169.52699279785156, 500.1388244628906, 198.71263122558594, 508.5134582519531], "spans": [[13, 1]], "text": "1107470", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [208.6750030517578, 500.1388244628906, 233.69125366210938, 508.5134582519531], "spans": [[13, 2]], "text": "941123", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [243.65292358398438, 500.1388244628906, 264.49981689453125, 508.5134582519531], "spans": [[13, 3]], "text": "99816", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [274.46148681640625, 500.1388244628906, 295.3083801269531, 508.5134582519531], "spans": [[13, 4]], "text": "66531", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [305.27301025390625, 500.1388244628906, 324.9811706542969, 508.5134582519531], "spans": [[13, 5]], "text": "82-83", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [334.9428405761719, 500.1388244628906, 354.6510009765625, 508.5134582519531], "spans": [[13, 6]], "text": "71-74", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [364.6126708984375, 500.1388244628906, 384.3208312988281, 508.5134582519531], "spans": [[13, 7]], "text": "79-81", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [398.4518737792969, 500.1388244628906, 418.1600341796875, 508.5134582519531], "spans": [[13, 8]], "text": "89-94", "type": "body", "col": 8, "col-header": false, "col-span": [8, 9], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [428.1217041015625, 500.1388244628906, 447.8298645019531, 508.5134582519531], "spans": [[13, 9]], "text": "86-91", "type": "body", "col": 9, "col-header": false, "col-span": [9, 10], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [457.8005065917969, 500.1388244628906, 477.5086669921875, 508.5134582519531], "spans": [[13, 10]], "text": "71-76", "type": "body", "col": 10, "col-header": false, "col-span": [10, 11], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [487.4703369140625, 500.1388244628906, 507.1784973144531, 508.5134582519531], "spans": [[13, 11]], "text": "68-85", "type": "body", "col": 11, "col-header": false, "col-span": [11, 12], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [61.93328094482422, 440.30438232421875, 285.75616455078125, 596.587158203125], "page": 6, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", "type": "table", "#-cols": 6, "#-rows": 14, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [132.36500549316406, 585.65185546875, 157.99098205566406, 594.0264892578125], "spans": [[0, 1], [1, 1]], "text": "human", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [173.5050048828125, 585.65185546875, 204.618408203125, 594.0264892578125], "spans": [[0, 2], [0, 3]], "text": "MRCNN", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [173.5050048828125, 585.65185546875, 204.618408203125, 594.0264892578125], "spans": [[0, 2], [0, 3]], "text": "MRCNN", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [220.13027954101562, 585.65185546875, 248.069580078125, 594.0264892578125], "spans": [[0, 4]], "text": "FRCNN", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [258.03125, 585.65185546875, 280.1782531738281, 594.0264892578125], "spans": [[0, 5]], "text": "YOLO", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [132.36500549316406, 585.65185546875, 157.99098205566406, 594.0264892578125], "spans": [[0, 1], [1, 1]], "text": "human", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [168.39300537109375, 574.6928100585938, 181.9950408935547, 583.0674438476562], "spans": [[1, 2]], "text": "R50", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [192.39605712890625, 574.6928100585938, 210.16746520996094, 583.0674438476562], "spans": [[1, 3]], "text": "R101", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [225.2130889892578, 574.6928100585938, 242.9844970703125, 583.0674438476562], "spans": [[1, 4]], "text": "R101", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [260.5137939453125, 574.6928100585938, 277.702392578125, 583.0674438476562], "spans": [[1, 5]], "text": "v5x6", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [67.66300201416016, 563.3358154296875, 96.8486328125, 571.71044921875], "spans": [[2, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [135.32400512695312, 563.3358154296875, 155.0321502685547, 571.71044921875], "spans": [[2, 1]], "text": "84-89", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [167.95399475097656, 563.3358154296875, 182.43472290039062, 571.71044921875], "spans": [[2, 2]], "text": "68.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [194.04620361328125, 563.3358154296875, 208.52694702148438, 571.71044921875], "spans": [[2, 3]], "text": "71.5", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [226.8632354736328, 563.3358154296875, 241.34396362304688, 571.71044921875], "spans": [[2, 4]], "text": "70.1", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [261.8680419921875, 563.3358154296875, 276.3487854003906, 571.71044921875], "spans": [[2, 5]], "text": "77.7", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [67.66300201416016, 552.3768310546875, 100.16619873046875, 560.75146484375], "spans": [[3, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [135.32400512695312, 552.3768310546875, 155.0321502685547, 560.75146484375], "spans": [[3, 1]], "text": "83-91", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [167.95399475097656, 552.3768310546875, 182.43472290039062, 560.75146484375], "spans": [[3, 2]], "text": "70.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [194.04620361328125, 552.3768310546875, 208.52694702148438, 560.75146484375], "spans": [[3, 3]], "text": "71.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [226.8632354736328, 552.3768310546875, 241.34396362304688, 560.75146484375], "spans": [[3, 4]], "text": "73.7", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [261.8680419921875, 552.3768310546875, 276.3487854003906, 560.75146484375], "spans": [[3, 5]], "text": "77.2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [67.66300201416016, 541.4178466796875, 98.1756591796875, 549.79248046875], "spans": [[4, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [135.32400512695312, 541.4178466796875, 155.0321502685547, 549.79248046875], "spans": [[4, 1]], "text": "83-85", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [167.95399475097656, 541.4178466796875, 182.43472290039062, 549.79248046875], "spans": [[4, 2]], "text": "60.1", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [194.04620361328125, 541.4178466796875, 208.52694702148438, 549.79248046875], "spans": [[4, 3]], "text": "63.4", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [226.8632354736328, 541.4178466796875, 241.34396362304688, 549.79248046875], "spans": [[4, 4]], "text": "63.5", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [261.8680419921875, 541.4178466796875, 276.3487854003906, 549.79248046875], "spans": [[4, 5]], "text": "66.2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [67.66300201416016, 530.4588012695312, 100.54279327392578, 538.8334350585938], "spans": [[5, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [135.32400512695312, 530.4588012695312, 155.0321502685547, 538.8334350585938], "spans": [[5, 1]], "text": "87-88", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [167.95399475097656, 530.4588012695312, 182.43472290039062, 538.8334350585938], "spans": [[5, 2]], "text": "81.2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [194.04620361328125, 530.4588012695312, 208.52694702148438, 538.8334350585938], "spans": [[5, 3]], "text": "80.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [226.8632354736328, 530.4588012695312, 241.34396362304688, 538.8334350585938], "spans": [[5, 4]], "text": "81.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [261.8680419921875, 530.4588012695312, 276.3487854003906, 538.8334350585938], "spans": [[5, 5]], "text": "86.2", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [67.66300201416016, 519.4998168945312, 110.19064331054688, 527.8744506835938], "spans": [[6, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [135.32400512695312, 519.4998168945312, 155.0321502685547, 527.8744506835938], "spans": [[6, 1]], "text": "93-94", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [167.95399475097656, 519.4998168945312, 182.43472290039062, 527.8744506835938], "spans": [[6, 2]], "text": "61.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [194.04620361328125, 519.4998168945312, 208.52694702148438, 527.8744506835938], "spans": [[6, 3]], "text": "59.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [226.8632354736328, 519.4998168945312, 241.34396362304688, 527.8744506835938], "spans": [[6, 4]], "text": "58.9", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [261.8680419921875, 519.4998168945312, 276.3487854003906, 527.8744506835938], "spans": [[6, 5]], "text": "61.1", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [67.66300201416016, 508.54083251953125, 112.94332122802734, 516.9154663085938], "spans": [[7, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [135.32400512695312, 508.54083251953125, 155.0321502685547, 516.9154663085938], "spans": [[7, 1]], "text": "85-89", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [167.95399475097656, 508.54083251953125, 182.43472290039062, 516.9154663085938], "spans": [[7, 2]], "text": "71.9", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [194.04620361328125, 508.54083251953125, 208.52694702148438, 516.9154663085938], "spans": [[7, 3]], "text": "70.0", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [226.8632354736328, 508.54083251953125, 241.34396362304688, 516.9154663085938], "spans": [[7, 4]], "text": "72.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [261.8680419921875, 508.54083251953125, 276.3487854003906, 516.9154663085938], "spans": [[7, 5]], "text": "67.9", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [67.66300201416016, 497.5818176269531, 93.64762878417969, 505.9564514160156], "spans": [[8, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [135.32400512695312, 497.5818176269531, 155.0321502685547, 505.9564514160156], "spans": [[8, 1]], "text": "69-71", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [167.95399475097656, 497.5818176269531, 182.43472290039062, 505.9564514160156], "spans": [[8, 2]], "text": "71.7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [194.04620361328125, 497.5818176269531, 208.52694702148438, 505.9564514160156], "spans": [[8, 3]], "text": "72.7", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [226.8632354736328, 497.5818176269531, 241.34396362304688, 505.9564514160156], "spans": [[8, 4]], "text": "72.0", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [261.8680419921875, 497.5818176269531, 276.3487854003906, 505.9564514160156], "spans": [[8, 5]], "text": "77.1", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [67.66300201416016, 486.6228332519531, 122.40287780761719, 494.9974670410156], "spans": [[9, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [135.32400512695312, 486.6228332519531, 155.0321502685547, 494.9974670410156], "spans": [[9, 1]], "text": "83-84", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [167.95399475097656, 486.6228332519531, 182.43472290039062, 494.9974670410156], "spans": [[9, 2]], "text": "67.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [194.04620361328125, 486.6228332519531, 208.52694702148438, 494.9974670410156], "spans": [[9, 3]], "text": "69.3", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [226.8632354736328, 486.6228332519531, 241.34396362304688, 494.9974670410156], "spans": [[9, 4]], "text": "68.4", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [261.8680419921875, 486.6228332519531, 276.3487854003906, 494.9974670410156], "spans": [[9, 5]], "text": "74.6", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [67.66300201416016, 475.663818359375, 87.46977996826172, 484.0384521484375], "spans": [[10, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [135.32400512695312, 475.663818359375, 155.0321502685547, 484.0384521484375], "spans": [[10, 1]], "text": "77-81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [167.95399475097656, 475.663818359375, 182.43472290039062, 484.0384521484375], "spans": [[10, 2]], "text": "82.2", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [194.04620361328125, 475.663818359375, 208.52694702148438, 484.0384521484375], "spans": [[10, 3]], "text": "82.9", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [226.8632354736328, 475.663818359375, 241.34396362304688, 484.0384521484375], "spans": [[10, 4]], "text": "82.2", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [261.8680419921875, 475.663818359375, 276.3487854003906, 484.0384521484375], "spans": [[10, 5]], "text": "86.3", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [67.66300201416016, 464.7058410644531, 83.62319946289062, 473.0804748535156], "spans": [[11, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [135.32400512695312, 464.7058410644531, 155.0321502685547, 473.0804748535156], "spans": [[11, 1]], "text": "84-86", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [167.95399475097656, 464.7058410644531, 182.43472290039062, 473.0804748535156], "spans": [[11, 2]], "text": "84.6", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [194.04620361328125, 464.7058410644531, 208.52694702148438, 473.0804748535156], "spans": [[11, 3]], "text": "85.8", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [226.8632354736328, 464.7058410644531, 241.34396362304688, 473.0804748535156], "spans": [[11, 4]], "text": "85.4", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [261.8680419921875, 464.7058410644531, 276.3487854003906, 473.0804748535156], "spans": [[11, 5]], "text": "88.1", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [67.66300201416016, 453.746826171875, 84.65432739257812, 462.1214599609375], "spans": [[12, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [135.32400512695312, 453.746826171875, 155.0321502685547, 462.1214599609375], "spans": [[12, 1]], "text": "60-72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [167.95399475097656, 453.746826171875, 182.43472290039062, 462.1214599609375], "spans": [[12, 2]], "text": "76.7", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [194.04620361328125, 453.746826171875, 208.52694702148438, 462.1214599609375], "spans": [[12, 3]], "text": "80.4", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [226.8632354736328, 453.746826171875, 241.34396362304688, 462.1214599609375], "spans": [[12, 4]], "text": "79.9", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [261.8680419921875, 453.746826171875, 276.3487854003906, 462.1214599609375], "spans": [[12, 5]], "text": "82.7", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [67.66300201416016, 442.3888244628906, 78.62890625, 450.7634582519531], "spans": [[13, 0]], "text": "All", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [135.32400512695312, 442.3888244628906, 155.0321502685547, 450.7634582519531], "spans": [[13, 1]], "text": "82-83", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [167.95399475097656, 442.3888244628906, 182.43472290039062, 450.7634582519531], "spans": [[13, 2]], "text": "72.4", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [194.04620361328125, 442.3888244628906, 208.52694702148438, 450.7634582519531], "spans": [[13, 3]], "text": "73.5", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [226.8632354736328, 442.3888244628906, 241.34396362304688, 450.7634582519531], "spans": [[13, 4]], "text": "73.4", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [261.8680419921875, 442.3888244628906, 276.3487854003906, 450.7634582519531], "spans": [[13, 5]], "text": "76.8", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [80.5073471069336, 496.419189453125, 267.3428649902344, 640.9814453125], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels.", "type": "table", "#-cols": 5, "#-rows": 13, "data": [[{"bbox": [86.37200164794922, 630.5248413085938, 129.4645233154297, 638.8994750976562], "spans": [[0, 0]], "text": "Class-count", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [151.07400512695312, 630.5248413085938, 159.41275024414062, 638.8994750976562], "spans": [[0, 1]], "text": "11", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [179.3181610107422, 630.5248413085938, 183.48753356933594, 638.8994750976562], "spans": [[0, 2]], "text": "6", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [213.33668518066406, 630.5248413085938, 217.5060577392578, 638.8994750976562], "spans": [[0, 3]], "text": "5", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [247.35520935058594, 630.5248413085938, 251.5245819091797, 638.8994750976562], "spans": [[0, 4]], "text": "4", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [86.37200164794922, 619.1678466796875, 115.55763244628906, 627.54248046875], "spans": [[1, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [151.07400512695312, 619.1678466796875, 159.41275024414062, 627.54248046875], "spans": [[1, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [173.42723083496094, 619.1678466796875, 189.38742065429688, 627.54248046875], "spans": [[1, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [207.4457550048828, 619.1678466796875, 223.40594482421875, 627.54248046875], "spans": [[1, 3]], "text": "Text", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [241.4642791748047, 619.1678466796875, 257.4244689941406, 627.54248046875], "spans": [[1, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [86.37200164794922, 608.2088012695312, 118.87519836425781, 616.5834350585938], "spans": [[2, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [151.07400512695312, 608.2088012695312, 159.41275024414062, 616.5834350585938], "spans": [[2, 1]], "text": "71", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [173.42723083496094, 608.2088012695312, 189.38742065429688, 616.5834350585938], "spans": [[2, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [207.4457550048828, 608.2088012695312, 223.40594482421875, 616.5834350585938], "spans": [[2, 3]], "text": "Text", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [241.4642791748047, 608.2088012695312, 257.4244689941406, 616.5834350585938], "spans": [[2, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [86.37200164794922, 597.2498168945312, 116.88465881347656, 605.6244506835938], "spans": [[3, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [151.07400512695312, 597.2498168945312, 159.41275024414062, 605.6244506835938], "spans": [[3, 1]], "text": "60", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [173.42723083496094, 597.2498168945312, 189.38742065429688, 605.6244506835938], "spans": [[3, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [207.4457550048828, 597.2498168945312, 223.40594482421875, 605.6244506835938], "spans": [[3, 3]], "text": "Text", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [241.4642791748047, 597.2498168945312, 257.4244689941406, 605.6244506835938], "spans": [[3, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [86.37200164794922, 586.2908325195312, 119.25179290771484, 594.6654663085938], "spans": [[4, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [151.07400512695312, 586.2908325195312, 159.41275024414062, 594.6654663085938], "spans": [[4, 1]], "text": "81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [173.42723083496094, 586.2908325195312, 189.38742065429688, 594.6654663085938], "spans": [[4, 2]], "text": "Text", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [211.2564697265625, 586.2908325195312, 219.59521484375, 594.6654663085938], "spans": [[4, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [241.46426391601562, 586.2908325195312, 257.4244689941406, 594.6654663085938], "spans": [[4, 4]], "text": "Text", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [86.37200164794922, 575.3318481445312, 128.89964294433594, 583.7064819335938], "spans": [[5, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [151.07400512695312, 575.3318481445312, 159.41275024414062, 583.7064819335938], "spans": [[5, 1]], "text": "62", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [177.23794555664062, 575.3318481445312, 185.57669067382812, 583.7064819335938], "spans": [[5, 2]], "text": "62", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [213.9105224609375, 575.3318481445312, 216.941162109375, 583.7064819335938], "spans": [[5, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [247.92904663085938, 575.3318481445312, 250.95968627929688, 583.7064819335938], "spans": [[5, 4]], "text": "-", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [86.37200164794922, 564.372802734375, 131.65231323242188, 572.7474365234375], "spans": [[6, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [151.07400512695312, 564.372802734375, 159.41275024414062, 572.7474365234375], "spans": [[6, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [177.23794555664062, 564.372802734375, 185.57669067382812, 572.7474365234375], "spans": [[6, 2]], "text": "68", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [213.9105224609375, 564.372802734375, 216.941162109375, 572.7474365234375], "spans": [[6, 3]], "text": "-", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [247.92904663085938, 564.372802734375, 250.95968627929688, 572.7474365234375], "spans": [[6, 4]], "text": "-", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [86.37200164794922, 553.413818359375, 112.35662841796875, 561.7884521484375], "spans": [[7, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [151.07400512695312, 553.413818359375, 159.41275024414062, 561.7884521484375], "spans": [[7, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [177.23794555664062, 553.413818359375, 185.57669067382812, 561.7884521484375], "spans": [[7, 2]], "text": "72", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [211.25645446777344, 553.413818359375, 219.59519958496094, 561.7884521484375], "spans": [[7, 3]], "text": "72", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [245.27496337890625, 553.413818359375, 253.61370849609375, 561.7884521484375], "spans": [[7, 4]], "text": "72", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [86.37200164794922, 542.455810546875, 141.11187744140625, 550.8304443359375], "spans": [[8, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [151.07400512695312, 542.455810546875, 159.41275024414062, 550.8304443359375], "spans": [[8, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [177.23794555664062, 542.455810546875, 185.57669067382812, 550.8304443359375], "spans": [[8, 2]], "text": "67", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [211.25645446777344, 542.455810546875, 219.59519958496094, 550.8304443359375], "spans": [[8, 3]], "text": "69", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [245.27496337890625, 542.455810546875, 253.61370849609375, 550.8304443359375], "spans": [[8, 4]], "text": "68", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [86.37200164794922, 531.496826171875, 106.17877960205078, 539.8714599609375], "spans": [[9, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [151.07400512695312, 531.496826171875, 159.41275024414062, 539.8714599609375], "spans": [[9, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [177.23794555664062, 531.496826171875, 185.57669067382812, 539.8714599609375], "spans": [[9, 2]], "text": "83", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [211.25645446777344, 531.496826171875, 219.59519958496094, 539.8714599609375], "spans": [[9, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [245.27496337890625, 531.496826171875, 253.61370849609375, 539.8714599609375], "spans": [[9, 4]], "text": "82", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [86.37200164794922, 520.537841796875, 102.33219909667969, 528.9124755859375], "spans": [[10, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [151.07400512695312, 520.537841796875, 159.41275024414062, 528.9124755859375], "spans": [[10, 1]], "text": "85", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [177.23794555664062, 520.537841796875, 185.57669067382812, 528.9124755859375], "spans": [[10, 2]], "text": "84", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [211.25645446777344, 520.537841796875, 219.59519958496094, 528.9124755859375], "spans": [[10, 3]], "text": "84", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [245.27496337890625, 520.537841796875, 253.61370849609375, 528.9124755859375], "spans": [[10, 4]], "text": "84", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [86.37200164794922, 509.5788269042969, 103.36332702636719, 517.9534301757812], "spans": [[11, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [151.07400512695312, 509.5788269042969, 159.41275024414062, 517.9534301757812], "spans": [[11, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [169.37442016601562, 509.5788269042969, 193.4312744140625, 517.9534301757812], "spans": [[11, 2]], "text": "Sec.-h.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [203.3929443359375, 509.5788269042969, 227.44979858398438, 517.9534301757812], "spans": [[11, 3]], "text": "Sec.-h.", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [237.41146850585938, 509.5788269042969, 261.46832275390625, 517.9534301757812], "spans": [[11, 4]], "text": "Sec.-h.", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [86.37200164794922, 498.2208251953125, 113.3160171508789, 506.595458984375], "spans": [[12, 0]], "text": "Overall", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [151.07400512695312, 498.2208251953125, 159.41275024414062, 506.595458984375], "spans": [[12, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [177.23794555664062, 498.2208251953125, 185.57669067382812, 506.595458984375], "spans": [[12, 2]], "text": "73", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [211.25645446777344, 498.2208251953125, 219.59519958496094, 506.595458984375], "spans": [[12, 3]], "text": "78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [245.27496337890625, 498.2208251953125, 253.61370849609375, 506.595458984375], "spans": [[12, 4]], "text": "77", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [353.065185546875, 485.2873840332031, 523.3069458007812, 641.25341796875], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement.", "type": "table", "#-cols": 5, "#-rows": 14, "data": [[{"bbox": [358.6390075683594, 630.5248413085938, 401.7315368652344, 638.8994750976562], "spans": [[0, 0]], "text": "Class-count", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [440.2250061035156, 630.5248413085938, 448.5637512207031, 638.8994750976562], "spans": [[0, 1], [0, 2]], "text": "11", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [440.2250061035156, 630.5248413085938, 448.5637512207031, 638.8994750976562], "spans": [[0, 1], [0, 2]], "text": "11", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [494.3800048828125, 630.5248413085938, 498.54937744140625, 638.8994750976562], "spans": [[0, 3], [0, 4]], "text": "5", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [494.3800048828125, 630.5248413085938, 498.54937744140625, 638.8994750976562], "spans": [[0, 3], [0, 4]], "text": "5", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [358.6390075683594, 619.5658569335938, 375.27166748046875, 627.9404907226562], "spans": [[1, 0]], "text": "Split", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [423.34100341796875, 619.5658569335938, 438.0458984375, 627.9404907226562], "spans": [[1, 1]], "text": "Doc", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [448.007568359375, 619.5658569335938, 465.44720458984375, 627.9404907226562], "spans": [[1, 2]], "text": "Page", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [475.4110107421875, 619.5658569335938, 490.11590576171875, 627.9404907226562], "spans": [[1, 3]], "text": "Doc", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [500.07757568359375, 619.5658569335938, 517.5172119140625, 627.9404907226562], "spans": [[1, 4]], "text": "Page", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [358.6390075683594, 608.2088012695312, 387.82464599609375, 616.5834350585938], "spans": [[2, 0]], "text": "Caption", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [426.52398681640625, 608.2088012695312, 434.86273193359375, 616.5834350585938], "spans": [[2, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [452.5624084472656, 608.2088012695312, 460.9011535644531, 616.5834350585938], "spans": [[2, 2]], "text": "83", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [358.6390075683594, 597.2498168945312, 391.1422119140625, 605.6244506835938], "spans": [[3, 0]], "text": "Footnote", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [426.52398681640625, 597.2498168945312, 434.86273193359375, 605.6244506835938], "spans": [[3, 1]], "text": "71", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [452.5624084472656, 597.2498168945312, 460.9011535644531, 605.6244506835938], "spans": [[3, 2]], "text": "84", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [358.6390075683594, 586.2908325195312, 389.15167236328125, 594.6654663085938], "spans": [[4, 0]], "text": "Formula", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [426.52398681640625, 586.2908325195312, 434.86273193359375, 594.6654663085938], "spans": [[4, 1]], "text": "60", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [452.5624084472656, 586.2908325195312, 460.9011535644531, 594.6654663085938], "spans": [[4, 2]], "text": "66", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [358.6390075683594, 575.3318481445312, 391.518798828125, 583.7064819335938], "spans": [[5, 0]], "text": "List-item", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [426.52398681640625, 575.3318481445312, 434.86273193359375, 583.7064819335938], "spans": [[5, 1]], "text": "81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [452.5624084472656, 575.3318481445312, 460.9011535644531, 583.7064819335938], "spans": [[5, 2]], "text": "88", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [478.593994140625, 575.3318481445312, 486.9327392578125, 583.7064819335938], "spans": [[5, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [504.6324157714844, 575.3318481445312, 512.97119140625, 583.7064819335938], "spans": [[5, 4]], "text": "88", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [358.6390075683594, 564.372802734375, 401.1666564941406, 572.7474365234375], "spans": [[6, 0]], "text": "Page-footer", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [426.52398681640625, 564.372802734375, 434.86273193359375, 572.7474365234375], "spans": [[6, 1]], "text": "62", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [452.5624084472656, 564.372802734375, 460.9011535644531, 572.7474365234375], "spans": [[6, 2]], "text": "89", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [358.6390075683594, 553.413818359375, 403.9193115234375, 561.7884521484375], "spans": [[7, 0]], "text": "Page-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [426.52398681640625, 553.413818359375, 434.86273193359375, 561.7884521484375], "spans": [[7, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [452.5624084472656, 553.413818359375, 460.9011535644531, 561.7884521484375], "spans": [[7, 2]], "text": "90", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [358.6390075683594, 542.455810546875, 384.6236572265625, 550.8304443359375], "spans": [[8, 0]], "text": "Picture", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [426.52398681640625, 542.455810546875, 434.86273193359375, 550.8304443359375], "spans": [[8, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [452.5624084472656, 542.455810546875, 460.9011535644531, 550.8304443359375], "spans": [[8, 2]], "text": "82", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [478.593994140625, 542.455810546875, 486.9327392578125, 550.8304443359375], "spans": [[8, 3]], "text": "72", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [504.6324157714844, 542.455810546875, 512.97119140625, 550.8304443359375], "spans": [[8, 4]], "text": "82", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [358.6390075683594, 531.496826171875, 413.37890625, 539.8714599609375], "spans": [[9, 0]], "text": "Section-header", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [426.52398681640625, 531.496826171875, 434.86273193359375, 539.8714599609375], "spans": [[9, 1]], "text": "68", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [452.5624084472656, 531.496826171875, 460.9011535644531, 539.8714599609375], "spans": [[9, 2]], "text": "83", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [478.593994140625, 531.496826171875, 486.9327392578125, 539.8714599609375], "spans": [[9, 3]], "text": "69", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [504.6324157714844, 531.496826171875, 512.97119140625, 539.8714599609375], "spans": [[9, 4]], "text": "83", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [358.6390075683594, 520.537841796875, 378.4457702636719, 528.9124755859375], "spans": [[10, 0]], "text": "Table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [426.52398681640625, 520.537841796875, 434.86273193359375, 528.9124755859375], "spans": [[10, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [452.5624084472656, 520.537841796875, 460.9011535644531, 528.9124755859375], "spans": [[10, 2]], "text": "89", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [478.593994140625, 520.537841796875, 486.9327392578125, 528.9124755859375], "spans": [[10, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [504.6324157714844, 520.537841796875, 512.97119140625, 528.9124755859375], "spans": [[10, 4]], "text": "90", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [358.6390075683594, 509.5788269042969, 374.5992126464844, 517.9534301757812], "spans": [[11, 0]], "text": "Text", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [426.52398681640625, 509.5788269042969, 434.86273193359375, 517.9534301757812], "spans": [[11, 1]], "text": "85", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [452.5624084472656, 509.5788269042969, 460.9011535644531, 517.9534301757812], "spans": [[11, 2]], "text": "91", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [478.593994140625, 509.5788269042969, 486.9327392578125, 517.9534301757812], "spans": [[11, 3]], "text": "84", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [504.6324157714844, 509.5788269042969, 512.97119140625, 517.9534301757812], "spans": [[11, 4]], "text": "90", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [358.6390075683594, 498.6198425292969, 375.6303405761719, 506.9944763183594], "spans": [[12, 0]], "text": "Title", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [426.52398681640625, 498.6198425292969, 434.86273193359375, 506.9944763183594], "spans": [[12, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [452.5624084472656, 498.6198425292969, 460.9011535644531, 506.9944763183594], "spans": [[12, 2]], "text": "81", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [358.6390075683594, 487.2628173828125, 369.60491943359375, 495.637451171875], "spans": [[13, 0]], "text": "All", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [426.52398681640625, 487.2628173828125, 434.86273193359375, 495.637451171875], "spans": [[13, 1]], "text": "72", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [452.5624084472656, 487.2628173828125, 460.9011535644531, 495.637451171875], "spans": [[13, 2]], "text": "84", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [478.593994140625, 487.2628173828125, 486.9327392578125, 495.637451171875], "spans": [[13, 3]], "text": "78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [504.6324157714844, 487.2628173828125, 512.97119140625, 495.637451171875], "spans": [[13, 4]], "text": "87", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [72.87370300292969, 452.12615966796875, 274.87945556640625, 619.3699951171875], "page": 8, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets.", "type": "table", "#-cols": 4, "#-rows": 15, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [217.74099731445312, 608.6068115234375, 256.2606506347656, 616.9814453125], "spans": [[0, 1], [0, 2], [0, 3]], "text": "Testing on", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [217.74099731445312, 608.6068115234375, 256.2606506347656, 616.9814453125], "spans": [[0, 1], [0, 2], [0, 3]], "text": "Testing on", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [217.74099731445312, 608.6068115234375, 256.2606506347656, 616.9814453125], "spans": [[0, 1], [0, 2], [0, 3]], "text": "Testing on", "type": "col_header", "col": 3, "col-header": false, "col-span": [1, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [154.62899780273438, 597.6488037109375, 175.4758758544922, 606.0234375], "spans": [[1, 0]], "text": "labels", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [204.69000244140625, 597.6488037109375, 220.5426025390625, 606.0234375], "spans": [[1, 1]], "text": "PLN", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [230.5042724609375, 597.6488037109375, 242.0619659423828, 606.0234375], "spans": [[1, 2]], "text": "DB", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [252.0236358642578, 597.6488037109375, 269.31085205078125, 606.0234375], "spans": [[1, 3]], "text": "DLN", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [154.62899780273438, 586.2908325195312, 177.9237060546875, 594.6654663085938], "spans": [[2, 0]], "text": "Figure", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [208.44700622558594, 586.2908325195312, 216.78575134277344, 594.6654663085938], "spans": [[2, 1]], "text": "96", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [232.11830139160156, 586.2908325195312, 240.45704650878906, 594.6654663085938], "spans": [[2, 2]], "text": "43", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [256.4979248046875, 586.2908325195312, 264.836669921875, 594.6654663085938], "spans": [[2, 3]], "text": "23", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [154.62899780273438, 575.3318481445312, 194.72674560546875, 583.7064819335938], "spans": [[3, 0]], "text": "Sec-header", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [208.44700622558594, 575.3318481445312, 216.78575134277344, 583.7064819335938], "spans": [[3, 1]], "text": "87", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [234.77235412597656, 575.3318481445312, 237.80299377441406, 583.7064819335938], "spans": [[3, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [256.4979248046875, 575.3318481445312, 264.836669921875, 583.7064819335938], "spans": [[3, 3]], "text": "32", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [154.62899780273438, 564.372802734375, 174.43577575683594, 572.7474365234375], "spans": [[4, 0]], "text": "Table", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [208.44700622558594, 564.372802734375, 216.78575134277344, 572.7474365234375], "spans": [[4, 1]], "text": "95", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [232.11830139160156, 564.372802734375, 240.45704650878906, 572.7474365234375], "spans": [[4, 2]], "text": "24", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [256.4979248046875, 564.372802734375, 264.836669921875, 572.7474365234375], "spans": [[4, 3]], "text": "49", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [154.62899780273438, 553.413818359375, 170.5891876220703, 561.7884521484375], "spans": [[5, 0]], "text": "Text", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [208.44700622558594, 553.413818359375, 216.78575134277344, 561.7884521484375], "spans": [[5, 1]], "text": "96", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [234.77235412597656, 553.413818359375, 237.80299377441406, 561.7884521484375], "spans": [[5, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [256.4979248046875, 553.413818359375, 264.836669921875, 561.7884521484375], "spans": [[5, 3]], "text": "42", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [154.62899780273438, 542.455810546875, 171.27960205078125, 550.8304443359375], "spans": [[6, 0]], "text": "total", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [208.44700622558594, 542.455810546875, 216.78575134277344, 550.8304443359375], "spans": [[6, 1]], "text": "93", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [232.11830139160156, 542.455810546875, 240.45704650878906, 550.8304443359375], "spans": [[6, 2]], "text": "34", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [256.4979248046875, 542.455810546875, 264.836669921875, 550.8304443359375], "spans": [[6, 3]], "text": "30", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [154.62899780273438, 531.0978393554688, 177.9237060546875, 539.4724731445312], "spans": [[7, 0]], "text": "Figure", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [208.44700622558594, 531.0978393554688, 216.78575134277344, 539.4724731445312], "spans": [[7, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [232.11830139160156, 531.0978393554688, 240.45704650878906, 539.4724731445312], "spans": [[7, 2]], "text": "71", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [256.4979248046875, 531.0978393554688, 264.836669921875, 539.4724731445312], "spans": [[7, 3]], "text": "31", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [154.62899780273438, 520.1388549804688, 174.43577575683594, 528.5134887695312], "spans": [[8, 0]], "text": "Table", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [208.44700622558594, 520.1388549804688, 216.78575134277344, 528.5134887695312], "spans": [[8, 1]], "text": "19", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [232.11830139160156, 520.1388549804688, 240.45704650878906, 528.5134887695312], "spans": [[8, 2]], "text": "65", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [256.4979248046875, 520.1388549804688, 264.836669921875, 528.5134887695312], "spans": [[8, 3]], "text": "22", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [154.62899780273438, 509.1798400878906, 171.27960205078125, 517.554443359375], "spans": [[9, 0]], "text": "total", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [208.44700622558594, 509.1798400878906, 216.78575134277344, 517.554443359375], "spans": [[9, 1]], "text": "48", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [232.11830139160156, 509.1798400878906, 240.45704650878906, 517.554443359375], "spans": [[9, 2]], "text": "68", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [256.4979248046875, 509.1798400878906, 264.836669921875, 517.554443359375], "spans": [[9, 3]], "text": "27", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [154.62899780273438, 497.82281494140625, 177.9237060546875, 506.19744873046875], "spans": [[10, 0]], "text": "Figure", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [208.44700622558594, 497.82281494140625, 216.78575134277344, 506.19744873046875], "spans": [[10, 1]], "text": "67", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [232.11830139160156, 497.82281494140625, 240.45704650878906, 506.19744873046875], "spans": [[10, 2]], "text": "51", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [256.4979248046875, 497.82281494140625, 264.836669921875, 506.19744873046875], "spans": [[10, 3]], "text": "72", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [154.62899780273438, 486.86383056640625, 194.72674560546875, 495.23846435546875], "spans": [[11, 0]], "text": "Sec-header", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [208.44700622558594, 486.86383056640625, 216.78575134277344, 495.23846435546875], "spans": [[11, 1]], "text": "53", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [234.77235412597656, 486.86383056640625, 237.80299377441406, 495.23846435546875], "spans": [[11, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [256.4979248046875, 486.86383056640625, 264.836669921875, 495.23846435546875], "spans": [[11, 3]], "text": "68", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [154.62899780273438, 475.9048156738281, 174.43577575683594, 484.2794494628906], "spans": [[12, 0]], "text": "Table", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [208.44700622558594, 475.9048156738281, 216.78575134277344, 484.2794494628906], "spans": [[12, 1]], "text": "87", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [232.11830139160156, 475.9048156738281, 240.45704650878906, 484.2794494628906], "spans": [[12, 2]], "text": "43", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [256.4979248046875, 475.9048156738281, 264.836669921875, 484.2794494628906], "spans": [[12, 3]], "text": "82", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [154.62899780273438, 464.9458312988281, 170.5891876220703, 473.3204650878906], "spans": [[13, 0]], "text": "Text", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [208.44700622558594, 464.9458312988281, 216.78575134277344, 473.3204650878906], "spans": [[13, 1]], "text": "77", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [234.77235412597656, 464.9458312988281, 237.80299377441406, 473.3204650878906], "spans": [[13, 2]], "text": "-", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [256.4979248046875, 464.9458312988281, 264.836669921875, 473.3204650878906], "spans": [[13, 3]], "text": "84", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [154.62899780273438, 453.98681640625, 171.27960205078125, 462.3614501953125], "spans": [[14, 0]], "text": "total", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [208.44700622558594, 453.98681640625, 216.78575134277344, 462.3614501953125], "spans": [[14, 1]], "text": "59", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [232.11830139160156, 453.98681640625, 240.45704650878906, 462.3614501953125], "spans": [[14, 2]], "text": "47", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [256.4979248046875, 453.98681640625, 264.836669921875, 462.3614501953125], "spans": [[14, 3]], "text": "78", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 14, "row-header": false, "row-span": [14, 15]}]], "model": null, "bounding-box": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data/2305.03393v1-pg9.doctags.txt b/tests/data/2305.03393v1-pg9.doctags.txt new file mode 100644 index 00000000..d9749b6e --- /dev/null +++ b/tests/data/2305.03393v1-pg9.doctags.txt @@ -0,0 +1,20 @@ + +order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz. +5.1 Hyper Parameter Optimization +We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple and complex tables (tables with cell spans). Results are presented in Table. 1. It is evident that with OTSL, our model achieves the same TED score and slightly better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML. +Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart. + + + +##LanguageTEDsTEDsTEDsmAPInference +enc-layersdec-layersLanguagesimplecomplexall(0.75)time (secs) +66OTSL HTML0.965 0.9690.934 0.9270.955 0.9550.88 0.8572.73 5.39 +44OTSL HTML0.938 0.9520.9040.9270.8531.97 +OTSL HTML0.9230.909 0.897 0.9010.938 0.9150.8433.77 +240.9450.9310.859 0.8341.91 3.81 +42OTSL HTML0.952 0.9440.92 0.9030.942 0.9310.857 0.8241.22 2 +
Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.
+5.2 Quantitative Results +We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained and evaluated it on three publicly available data sets: PubTabNet (395k samples), FinTabNet (113k samples) and PubTables-1M (about 1M samples). Performance results are presented in Table. 2. It is clearly evident that the model trained on OTSL outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables. +Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation. +
\ No newline at end of file diff --git a/tests/data/2305.03393v1-pg9.json b/tests/data/2305.03393v1-pg9.json index 3c96e32c..cfd8e7e4 100644 --- a/tests/data/2305.03393v1-pg9.json +++ b/tests/data/2305.03393v1-pg9.json @@ -1 +1 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2305.03393v1-pg9.pdf", "filename-prov": null, "document-hash": "a07f5c34601ba2c234d898cbfaa9e29a7045996ccd82ccab3012516220a1f3a4", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "16ccd0a495625bd9c7a28a4b353d85137f3e6b09508a0d2280663478de9c9b25", "model": "default", "page": 1}]}, "main-text": [{"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [193.9645538330078, 689.2177734375, 447.5447692871094, 700.5064697265625], "page": 1, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "9", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [475.1263732910156, 689.2177734375, 480.5931396484375, 700.5064697265625], "page": 1, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.8929443359375, 639.093017578125, 480.79583740234375, 675.5369873046875], "page": 1, "span": [0, 163], "__ref_s3_data": null}]}, {"text": "5.1 Hyper Parameter Optimization", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.27793884277344, 612.7918090820312, 318.4514465332031, 625.2948608398438], "page": 1, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple and complex tables (tables with cell spans). Results are presented in Table. 1. It is evident that with OTSL, our model achieves the same TED score and slightly better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.84170532226562, 536.5759887695312, 481.2436218261719, 608.8849487304688], "page": 1, "span": [0, 423], "__ref_s3_data": null}]}, {"text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [133.8990936279297, 464.017822265625, 480.7420349121094, 519.2052612304688], "page": 1, "span": [0, 398], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"text": "5.2 Quantitative Results", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.489013671875, 273.8258056640625, 264.4082946777344, 286.3288879394531], "page": 1, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained and evaluated it on three publicly available data sets: PubTabNet (395k samples), FinTabNet (113k samples) and PubTables-1M (about 1M samples). Performance results are presented in Table. 2. It is clearly evident that the model trained on OTSL outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.97596740722656, 173.6999969482422, 480.8291931152344, 269.9199523925781], "page": 1, "span": [0, 555], "__ref_s3_data": null}]}, {"text": "Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.89259338378906, 125.87999725341797, 480.9114074707031, 174.2779541015625], "page": 1, "span": [0, 289], "__ref_s3_data": null}]}], "figures": [], "tables": [{"bounding-box": null, "prov": [{"bbox": [139.83172607421875, 322.2643737792969, 474.81011962890625, 454.8448791503906], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "table", "#-cols": 8, "#-rows": 7, "data": [[{"bbox": [160.3699951171875, 441.2538146972656, 168.04522705078125, 452.5425109863281], "spans": [[0, 0]], "text": "#", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [207.9739990234375, 441.2538146972656, 215.64923095703125, 452.5425109863281], "spans": [[0, 1]], "text": "#", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [239.79800415039062, 435.7748107910156, 278.33380126953125, 447.0635070800781], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [324.6700134277344, 441.2538146972656, 348.2641906738281, 452.5425109863281], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 441.2538146972656, 348.2641906738281, 452.5425109863281], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 441.2538146972656, 348.2641906738281, 452.5425109863281], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 5, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [396.27099609375, 441.2538146972656, 417.1259460449219, 452.5425109863281], "spans": [[0, 6]], "text": "mAP", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [430.77099609375, 441.2538146972656, 467.14141845703125, 452.5425109863281], "spans": [[0, 7]], "text": "Inference", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [144.5919952392578, 428.3028259277344, 183.82894897460938, 439.5915222167969], "spans": [[1, 0]], "text": "enc-layers", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [192.19500732421875, 428.3028259277344, 231.42303466796875, 439.5915222167969], "spans": [[1, 1]], "text": "dec-layers", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [239.79800415039062, 435.7748107910156, 278.33380126953125, 447.0635070800781], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [286.6860046386719, 428.3028259277344, 312.328125, 439.5915222167969], "spans": [[1, 3]], "text": "simple", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [320.7019958496094, 428.3028259277344, 353.71539306640625, 439.5915222167969], "spans": [[1, 4]], "text": "complex", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [369.3059997558594, 428.3028259277344, 379.0291442871094, 439.5915222167969], "spans": [[1, 5]], "text": "all", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [394.927001953125, 430.2948303222656, 418.4692077636719, 441.5835266113281], "spans": [[1, 6]], "text": "(0.75)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.14801025390625, 430.2948303222656, 470.7695617675781, 441.5835266113281], "spans": [[1, 7]], "text": "time (secs)", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [161.906005859375, 409.4728088378906, 166.51473999023438, 420.7615051269531], "spans": [[2, 0]], "text": "6", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [209.50900268554688, 409.4728088378906, 214.11773681640625, 420.7615051269531], "spans": [[2, 1]], "text": "6", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [245.17599487304688, 402.0008239746094, 272.9449462890625, 426.24151611328125], "spans": [[2, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [289.0169982910156, 402.0008239746094, 310.00732421875, 426.24151611328125], "spans": [[2, 3]], "text": "0.965 0.969", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [326.7170104980469, 402.0008239746094, 347.70733642578125, 426.24151611328125], "spans": [[2, 4]], "text": "0.934 0.927", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [363.6759948730469, 402.0008239746094, 384.66632080078125, 426.24151611328125], "spans": [[2, 5]], "text": "0.955 0.955", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [396.20599365234375, 402.0008239746094, 417.1963195800781, 426.3042907714844], "spans": [[2, 6]], "text": "0.88 0.857", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [439.5270080566406, 402.0008239746094, 458.38336181640625, 426.3042907714844], "spans": [[2, 7]], "text": "2.73 5.39", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [161.906005859375, 383.17181396484375, 166.51473999023438, 394.46051025390625], "spans": [[3, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [209.50900268554688, 383.17181396484375, 214.11773681640625, 394.46051025390625], "spans": [[3, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [245.17599487304688, 375.6998291015625, 272.9449462890625, 399.93951416015625], "spans": [[3, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [289.0169982910156, 375.6998291015625, 310.00732421875, 399.93951416015625], "spans": [[3, 3]], "text": "0.938 0.952", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [326.7170104980469, 388.65081787109375, 347.70733642578125, 399.93951416015625], "spans": [[3, 4]], "text": "0.904", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [363.6759948730469, 388.65081787109375, 384.66632080078125, 399.93951416015625], "spans": [[3, 5]], "text": "0.927", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [394.6180114746094, 388.5970153808594, 418.7779846191406, 400.0022888183594], "spans": [[3, 6]], "text": "0.853", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [439.5270080566406, 388.5970153808594, 458.38336181640625, 400.0022888183594], "spans": [[3, 7]], "text": "1.97", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": null, "spans": [[4, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [245.17599487304688, 349.3988342285156, 272.9449462890625, 373.6385192871094], "spans": [[4, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [289.0169982910156, 362.3498229980469, 310.00732421875, 373.6385192871094], "spans": [[4, 3]], "text": "0.923", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [326.7170104980469, 349.3988342285156, 347.70733642578125, 386.988525390625], "spans": [[4, 4]], "text": "0.909 0.897 0.901", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [362.0880126953125, 362.3498229980469, 386.24798583984375, 387.0513000488281], "spans": [[4, 5]], "text": "0.938 0.915", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [396.20599365234375, 375.6998291015625, 417.1963195800781, 386.988525390625], "spans": [[4, 6]], "text": "0.843", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [440.7669982910156, 375.6998291015625, 457.150390625, 386.988525390625], "spans": [[4, 7]], "text": "3.77", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [161.906005859375, 356.8708190917969, 166.51473999023438, 368.1595153808594], "spans": [[5, 0]], "text": "2", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [209.50900268554688, 356.8708190917969, 214.11773681640625, 368.1595153808594], "spans": [[5, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [289.0169982910156, 349.3988342285156, 310.00732421875, 360.6875305175781], "spans": [[5, 3]], "text": "0.945", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [362.0880126953125, 349.34503173828125, 386.24798583984375, 360.75030517578125], "spans": [[5, 5]], "text": "0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [394.6180114746094, 349.3988342285156, 418.7779846191406, 373.7012939453125], "spans": [[5, 6]], "text": "0.859 0.834", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [439.5270080566406, 349.3988342285156, 458.38336181640625, 373.7012939453125], "spans": [[5, 7]], "text": "1.91 3.81", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [161.906005859375, 330.5688171386719, 166.51473999023438, 341.8575134277344], "spans": [[6, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [209.50900268554688, 330.5688171386719, 214.11773681640625, 341.8575134277344], "spans": [[6, 1]], "text": "2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [245.17599487304688, 323.0968322753906, 272.9449462890625, 347.3375244140625], "spans": [[6, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [289.0169982910156, 323.0968322753906, 310.00732421875, 347.3375244140625], "spans": [[6, 3]], "text": "0.952 0.944", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [326.7170104980469, 323.0968322753906, 347.70733642578125, 347.3375244140625], "spans": [[6, 4]], "text": "0.92 0.903", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [362.0880126953125, 323.0968322753906, 386.24798583984375, 347.4002990722656], "spans": [[6, 5]], "text": "0.942 0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [394.6180114746094, 323.0968322753906, 418.7779846191406, 347.4002990722656], "spans": [[6, 6]], "text": "0.857 0.824", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [439.5270080566406, 323.0968322753906, 458.38336181640625, 347.4002990722656], "spans": [[6, 7]], "text": "1.22 2", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2305.03393v1-pg9.pdf", "filename-prov": null, "document-hash": "a07f5c34601ba2c234d898cbfaa9e29a7045996ccd82ccab3012516220a1f3a4", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "16ccd0a495625bd9c7a28a4b353d85137f3e6b09508a0d2280663478de9c9b25", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [193.9645538330078, 689.2177734375, 447.5447692871094, 700.5064697265625], "page": 1, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [475.1263732910156, 689.2177734375, 480.5931396484375, 700.5064697265625], "page": 1, "span": [0, 1], "__ref_s3_data": null}], "text": "9", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.8929443359375, 639.093017578125, 480.79583740234375, 675.5369873046875], "page": 1, "span": [0, 163], "__ref_s3_data": null}], "text": "order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.27793884277344, 612.7918090820312, 318.4514465332031, 625.2948608398438], "page": 1, "span": [0, 32], "__ref_s3_data": null}], "text": "5.1 Hyper Parameter Optimization", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.84170532226562, 536.5759887695312, 481.2436218261719, 608.8849487304688], "page": 1, "span": [0, 423], "__ref_s3_data": null}], "text": "We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple and complex tables (tables with cell spans). Results are presented in Table. 1. It is evident that with OTSL, our model achieves the same TED score and slightly better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.8990936279297, 464.017822265625, 480.7420349121094, 519.2052612304688], "page": 1, "span": [0, 398], "__ref_s3_data": null}], "text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"prov": [{"bbox": [134.489013671875, 273.8258056640625, 264.4082946777344, 286.3288879394531], "page": 1, "span": [0, 24], "__ref_s3_data": null}], "text": "5.2 Quantitative Results", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.97596740722656, 173.6999969482422, 480.8291931152344, 269.9199523925781], "page": 1, "span": [0, 555], "__ref_s3_data": null}], "text": "We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained and evaluated it on three publicly available data sets: PubTabNet (395k samples), FinTabNet (113k samples) and PubTables-1M (about 1M samples). Performance results are presented in Table. 2. It is clearly evident that the model trained on OTSL outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.89259338378906, 125.87999725341797, 480.9114074707031, 174.2779541015625], "page": 1, "span": [0, 289], "__ref_s3_data": null}], "text": "Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation.", "type": "paragraph", "name": "Text", "font": null}], "figures": [], "tables": [{"prov": [{"bbox": [139.83172607421875, 322.2643737792969, 474.81011962890625, 454.8448791503906], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "table", "#-cols": 8, "#-rows": 7, "data": [[{"bbox": [160.3699951171875, 441.2538146972656, 168.04522705078125, 452.5425109863281], "spans": [[0, 0]], "text": "#", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [207.9739990234375, 441.2538146972656, 215.64923095703125, 452.5425109863281], "spans": [[0, 1]], "text": "#", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [239.79800415039062, 435.7748107910156, 278.33380126953125, 447.0635070800781], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [324.6700134277344, 441.2538146972656, 348.2641906738281, 452.5425109863281], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 441.2538146972656, 348.2641906738281, 452.5425109863281], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 441.2538146972656, 348.2641906738281, 452.5425109863281], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 5, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [396.27099609375, 441.2538146972656, 417.1259460449219, 452.5425109863281], "spans": [[0, 6]], "text": "mAP", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [430.77099609375, 441.2538146972656, 467.14141845703125, 452.5425109863281], "spans": [[0, 7]], "text": "Inference", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [144.5919952392578, 428.3028259277344, 183.82894897460938, 439.5915222167969], "spans": [[1, 0]], "text": "enc-layers", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [192.19500732421875, 428.3028259277344, 231.42303466796875, 439.5915222167969], "spans": [[1, 1]], "text": "dec-layers", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [239.79800415039062, 435.7748107910156, 278.33380126953125, 447.0635070800781], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [286.6860046386719, 428.3028259277344, 312.328125, 439.5915222167969], "spans": [[1, 3]], "text": "simple", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [320.7019958496094, 428.3028259277344, 353.71539306640625, 439.5915222167969], "spans": [[1, 4]], "text": "complex", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [369.3059997558594, 428.3028259277344, 379.0291442871094, 439.5915222167969], "spans": [[1, 5]], "text": "all", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [394.927001953125, 430.2948303222656, 418.4692077636719, 441.5835266113281], "spans": [[1, 6]], "text": "(0.75)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.14801025390625, 430.2948303222656, 470.7695617675781, 441.5835266113281], "spans": [[1, 7]], "text": "time (secs)", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [161.906005859375, 409.4728088378906, 166.51473999023438, 420.7615051269531], "spans": [[2, 0]], "text": "6", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [209.50900268554688, 409.4728088378906, 214.11773681640625, 420.7615051269531], "spans": [[2, 1]], "text": "6", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [245.17599487304688, 402.0008239746094, 272.9449462890625, 426.24151611328125], "spans": [[2, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [289.0169982910156, 402.0008239746094, 310.00732421875, 426.24151611328125], "spans": [[2, 3]], "text": "0.965 0.969", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [326.7170104980469, 402.0008239746094, 347.70733642578125, 426.24151611328125], "spans": [[2, 4]], "text": "0.934 0.927", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [363.6759948730469, 402.0008239746094, 384.66632080078125, 426.24151611328125], "spans": [[2, 5]], "text": "0.955 0.955", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [396.20599365234375, 402.0008239746094, 417.1963195800781, 426.3042907714844], "spans": [[2, 6]], "text": "0.88 0.857", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [439.5270080566406, 402.0008239746094, 458.38336181640625, 426.3042907714844], "spans": [[2, 7]], "text": "2.73 5.39", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [161.906005859375, 383.17181396484375, 166.51473999023438, 394.46051025390625], "spans": [[3, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [209.50900268554688, 383.17181396484375, 214.11773681640625, 394.46051025390625], "spans": [[3, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [245.17599487304688, 375.6998291015625, 272.9449462890625, 399.93951416015625], "spans": [[3, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [289.0169982910156, 375.6998291015625, 310.00732421875, 399.93951416015625], "spans": [[3, 3]], "text": "0.938 0.952", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [326.7170104980469, 388.65081787109375, 347.70733642578125, 399.93951416015625], "spans": [[3, 4]], "text": "0.904", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [363.6759948730469, 388.65081787109375, 384.66632080078125, 399.93951416015625], "spans": [[3, 5]], "text": "0.927", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [394.6180114746094, 388.5970153808594, 418.7779846191406, 400.0022888183594], "spans": [[3, 6]], "text": "0.853", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [439.5270080566406, 388.5970153808594, 458.38336181640625, 400.0022888183594], "spans": [[3, 7]], "text": "1.97", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": null, "spans": [[4, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [245.17599487304688, 349.3988342285156, 272.9449462890625, 373.6385192871094], "spans": [[4, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [289.0169982910156, 362.3498229980469, 310.00732421875, 373.6385192871094], "spans": [[4, 3]], "text": "0.923", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [326.7170104980469, 349.3988342285156, 347.70733642578125, 386.988525390625], "spans": [[4, 4]], "text": "0.909 0.897 0.901", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [362.0880126953125, 362.3498229980469, 386.24798583984375, 387.0513000488281], "spans": [[4, 5]], "text": "0.938 0.915", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [396.20599365234375, 375.6998291015625, 417.1963195800781, 386.988525390625], "spans": [[4, 6]], "text": "0.843", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [440.7669982910156, 375.6998291015625, 457.150390625, 386.988525390625], "spans": [[4, 7]], "text": "3.77", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [161.906005859375, 356.8708190917969, 166.51473999023438, 368.1595153808594], "spans": [[5, 0]], "text": "2", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [209.50900268554688, 356.8708190917969, 214.11773681640625, 368.1595153808594], "spans": [[5, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [289.0169982910156, 349.3988342285156, 310.00732421875, 360.6875305175781], "spans": [[5, 3]], "text": "0.945", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [362.0880126953125, 349.34503173828125, 386.24798583984375, 360.75030517578125], "spans": [[5, 5]], "text": "0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [394.6180114746094, 349.3988342285156, 418.7779846191406, 373.7012939453125], "spans": [[5, 6]], "text": "0.859 0.834", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [439.5270080566406, 349.3988342285156, 458.38336181640625, 373.7012939453125], "spans": [[5, 7]], "text": "1.91 3.81", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [161.906005859375, 330.5688171386719, 166.51473999023438, 341.8575134277344], "spans": [[6, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [209.50900268554688, 330.5688171386719, 214.11773681640625, 341.8575134277344], "spans": [[6, 1]], "text": "2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [245.17599487304688, 323.0968322753906, 272.9449462890625, 347.3375244140625], "spans": [[6, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [289.0169982910156, 323.0968322753906, 310.00732421875, 347.3375244140625], "spans": [[6, 3]], "text": "0.952 0.944", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [326.7170104980469, 323.0968322753906, 347.70733642578125, 347.3375244140625], "spans": [[6, 4]], "text": "0.92 0.903", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [362.0880126953125, 323.0968322753906, 386.24798583984375, 347.4002990722656], "spans": [[6, 5]], "text": "0.942 0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [394.6180114746094, 323.0968322753906, 418.7779846191406, 347.4002990722656], "spans": [[6, 6]], "text": "0.857 0.824", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [439.5270080566406, 323.0968322753906, 458.38336181640625, 347.4002990722656], "spans": [[6, 7]], "text": "1.22 2", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data/2305.03393v1.doctags.txt b/tests/data/2305.03393v1.doctags.txt new file mode 100644 index 00000000..cca51d58 --- /dev/null +++ b/tests/data/2305.03393v1.doctags.txt @@ -0,0 +1,149 @@ + +Optimized Table Tokenization for Table Structure Recognition +Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, and Peter Staar [0000 - 0002 - 8088 - 0823] +IBM Research {mly,ahn,nli,cau,taa}@zurich.ibm.com +Abstract. Extracting tables from documents is a crucial task in any document conversion pipeline. Recently, transformer-based models have demonstrated that table-structure can be recognized with impressive accuracy using Image-to-Markup-Sequence (Im2Seq) approaches. Taking only the image of a table, such models predict a sequence of tokens (e.g. in HTML, LaTeX) which represent the structure of the table. Since the token representation of the table structure has a significant impact on the accuracy and run-time performance of any Im2Seq model, we investigate in this paper how table-structure representation can be optimised. We propose a new, optimised table-structure language (OTSL) with a minimized vocabulary and specific rules. The benefits of OTSL are that it reduces the number of tokens to 5 (HTML needs 28+) and shortens the sequence length to half of HTML on average. Consequently, model accuracy improves significantly, inference time is halved compared to HTML-based models, and the predicted table structures are always syntactically correct. This in turn eliminates most post-processing needs. Popular table structure data-sets will be published in OTSL format to the community. +Keywords: Table Structure Recognition · Data Representation · Transformers · Optimization. +1 Introduction +Tables are ubiquitous in documents such as scientific papers, patents, reports, manuals, specification sheets or marketing material. They often encode highly valuable information and therefore need to be extracted with high accuracy. Unfortunately, tables appear in documents in various sizes, styling and structure, making it difficult to recover their correct structure with simple analytical methods. Therefore, accurate table extraction is achieved these days with machine-learning based methods. +In modern document understanding systems [1,15], table extraction is typically a two-step process. Firstly, every table on a page is located with a bounding box, and secondly, their logical row and column structure is recognized. As of +Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, (B) minimal graphical representation of table structure using rectangular layout, (C) HTML representation, (D) OTSL representation. This example demonstrates many of the key-features of OTSL, namely its reduced vocabulary size (12 versus 5 in this case), its reduced sequence length (55 versus 30) and a enhanced internal structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL). +
+ +Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, (B) minimal graphical representation of table structure using rectangular layout, (C) HTML representation, (D) OTSL representation. This example demonstrates many of the key-features of OTSL, namely its reduced vocabulary size (12 versus 5 in this case), its reduced sequence length (55 versus 30) and a enhanced internal structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL). +
+today, table detection in documents is a well understood problem, and the latest state-of-the-art (SOTA) object detection methods provide an accuracy comparable to human observers [7,8,10,14,23]. On the other hand, the problem of table structure recognition (TSR) is a lot more challenging and remains a very active area of research, in which many novel machine learning algorithms are being explored [3,4,5,9,11,12,13,14,17,18,21,22]. +Recently emerging SOTA methods for table structure recognition employ transformer-based models, in which an image of the table is provided to the network in order to predict the structure of the table as a sequence of tokens. These image-to-sequence (Im2Seq) models are extremely powerful, since they allow for a purely data-driven solution. The tokens of the sequence typically belong to a markup language such as HTML, Latex or Markdown, which allow to describe table structure as rows, columns and spanning cells in various configurations. In Figure 1, we illustrate how HTML is used to represent the table-structure of a particular example table. Public table-structure data sets such as PubTabNet [22], and FinTabNet [21], which were created in a semi-automated way from paired PDF and HTML sources (e.g. PubMed Central), popularized primarily the use of HTML as ground-truth representation format for TSR. +While the majority of research in TSR is currently focused on the development and application of novel neural model architectures, the table structure representation language (e.g. HTML in PubTabNet and FinTabNet) is usually adopted as is for the sequence tokenization in Im2Seq models. In this paper, we aim for the opposite and investigate the impact of the table structure representation language with an otherwise unmodified Im2Seq transformer-based architecture. Since the current state-of-the-art Im2Seq model is TableFormer [9], we select this model to perform our experiments. +The main contribution of this paper is the introduction of a new optimised table structure language (OTSL), specifically designed to describe table-structure in an compact and structured way for Im2Seq models. OTSL has a number of key features, which make it very attractive to use in Im2Seq models. Specifically, compared to other languages such as HTML, OTSL has a minimized vocabulary which yields short sequence length, strong inherent structure (e.g. strict rectangular layout) and a strict syntax with rules that only look backwards. The latter allows for syntax validation during inference and ensures a syntactically correct table-structure. These OTSL features are illustrated in Figure 1, in comparison to HTML. +The paper is structured as follows. In section 2, we give an overview of the latest developments in table-structure reconstruction. In section 3 we review the current HTML table encoding (popularised by PubTabNet and FinTabNet) and discuss its flaws. Subsequently, we introduce OTSL in section 4, which includes the language definition, syntax rules and error-correction procedures. In section 5, we apply OTSL on the TableFormer architecture, compare it to TableFormer models trained on HTML and ultimately demonstrate the advantages of using OTSL. Finally, in section 6 we conclude our work and outline next potential steps. +2 Related Work +Approaches to formalize the logical structure and layout of tables in electronic documents date back more than two decades [16]. In the recent past, a wide variety of computer vision methods have been explored to tackle the problem of table structure recognition, i.e. the correct identification of columns, rows and spanning cells in a given table. Broadly speaking, the current deeplearning based approaches fall into three categories: object detection (OD) methods, Graph-Neural-Network (GNN) methods and Image-to-Markup-Sequence (Im2Seq) methods. Object-detection based methods [11,12,13,14,21] rely on tablestructure annotation using (overlapping) bounding boxes for training, and produce bounding-box predictions to define table cells, rows, and columns on a table image. Graph Neural Network (GNN) based methods [3,6,17,18], as the name suggests, represent tables as graph structures. The graph nodes represent the content of each table cell, an embedding vector from the table image, or geometric coordinates of the table cell. The edges of the graph define the relationship between the nodes, e.g. if they belong to the same column, row, or table cell. +Other work [20] aims at predicting a grid for each table and deciding which cells must be merged using an attention network. Im2Seq methods cast the problem as a sequence generation task [4,5,9,22], and therefore need an internal tablestructure representation language, which is often implemented with standard markup languages (e.g. HTML, LaTeX, Markdown). In theory, Im2Seq methods have a natural advantage over the OD and GNN methods by virtue of directly predicting the table-structure. As such, no post-processing or rules are needed in order to obtain the table-structure, which is necessary with OD and GNN approaches. In practice, this is not entirely true, because a predicted sequence of table-structure markup does not necessarily have to be syntactically correct. Hence, depending on the quality of the predicted sequence, some post-processing needs to be performed to ensure a syntactically valid (let alone correct) sequence. +Within the Im2Seq method, we find several popular models, namely the encoder-dual-decoder model (EDD) [22], TableFormer [9], Tabsplitter[2] and Ye et. al. [19]. EDD uses two consecutive long short-term memory (LSTM) decoders to predict a table in HTML representation. The tag decoder predicts a sequence of HTML tags. For each decoded table cell ( ), the attention is passed to the cell decoder to predict the content with an embedded OCR approach. The latter makes it susceptible to transcription errors in the cell content of the table. TableFormer address this reliance on OCR and uses two transformer decoders for HTML structure and cell bounding box prediction in an end-to-end architecture. The predicted cell bounding box is then used to extract text tokens from an originating (digital) PDF page, circumventing any need for OCR. TabSplitter [2] proposes a compact double-matrix representation of table rows and columns to do error detection and error correction of HTML structure sequences based on predictions from [19]. This compact double-matrix representation can not be used directly by the Img2seq model training, so the model uses HTML as an intermediate form. Chi et. al. [4] introduce a data set and a baseline method using bidirectional LSTMs to predict LaTeX code. Kayal [5] introduces Gated ResNet transformers to predict LaTeX code, and a separate OCR module to extract content. +Im2Seq approaches have shown to be well-suited for the TSR task and allow a full end-to-end network design that can output the final table structure without pre- or post-processing logic. Furthermore, Im2Seq models have demonstrated to deliver state-of-the-art prediction accuracy [9]. This motivated the authors to investigate if the performance (both in accuracy and inference time) can be further improved by optimising the table structure representation language. We believe this is a necessary step before further improving neural network architectures for this task. +3 Problem Statement +All known Im2Seq based models for TSR fundamentally work in similar ways. Given an image of a table, the Im2Seq model predicts the structure of the table by generating a sequence of tokens. These tokens originate from a finite vocab- +ulary and can be interpreted as a table structure. For example, with the HTML tokens ,
, , , and , one can construct simple table structures without any spanning cells. In reality though, one needs at least 28 HTML tokens to describe the most common complex tables observed in real-world documents [21,22], due to a variety of spanning cells definitions in the HTML token vocabulary.
+Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet. +
+ +Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet. +
+Obviously, HTML and other general-purpose markup languages were not designed for Im2Seq models. As such, they have some serious drawbacks. First, the token vocabulary needs to be artificially large in order to describe all plausible tabular structures. Since most Im2Seq models use an autoregressive approach, they generate the sequence token by token. Therefore, to reduce inference time, a shorter sequence length is critical. Every table-cell is represented by at least two tokens ( and ). Furthermore, when tokenizing the HTML structure, one needs to explicitly enumerate possible column-spans and row-spans as words. In practice, this ends up requiring 28 different HTML tokens (when including column- and row-spans up to 10 cells) just to describe every table in the PubTabNet dataset. Clearly, not every token is equally represented, as is depicted in Figure 2. This skewed distribution of tokens in combination with variable token row-length makes it challenging for models to learn the HTML structure. +Additionally, it would be desirable if the representation would easily allow an early detection of invalid sequences on-the-go, before the prediction of the entire table structure is completed. HTML is not well-suited for this purpose as the verification of incomplete sequences is non-trivial or even impossible. +In a valid HTML table, the token sequence must describe a 2D grid of table cells, serialised in row-major ordering, where each row and each column have the same length (while considering row- and column-spans). Furthermore, every opening tag in HTML needs to be matched by a closing tag in a correct hierarchical manner. Since the number of tokens for each table row and column can vary significantly, especially for large tables with many row- and column-spans, it is complex to verify the consistency of predicted structures during sequence +generation. Implicitly, this also means that Im2Seq models need to learn these complex syntax rules, simply to deliver valid output. +In practice, we observe two major issues with prediction quality when training Im2Seq models on HTML table structure generation from images. On the one hand, we find that on large tables, the visual attention of the model often starts to drift and is not accurately moving forward cell by cell anymore. This manifests itself in either in an increasing location drift for proposed table-cells in later rows on the same column or even complete loss of vertical alignment, as illustrated in Figure 5. Addressing this with post-processing is partially possible, but clearly undesired. On the other hand, we find many instances of predictions with structural inconsistencies or plain invalid HTML output, as shown in Figure 6, which are nearly impossible to properly correct. Both problems seriously impact the TSR model performance, since they reflect not only in the task of pure structure recognition but also in the equally crucial recognition or matching of table cell content. +4 Optimised Table Structure Language +To mitigate the issues with HTML in Im2Seq-based TSR models laid out before, we propose here our Optimised Table Structure Language (OTSL). OTSL is designed to express table structure with a minimized vocabulary and a simple set of rules, which are both significantly reduced compared to HTML. At the same time, OTSL enables easy error detection and correction during sequence generation. We further demonstrate how the compact structure representation and minimized sequence length improves prediction accuracy and inference time in the TableFormer architecture. +4.1 Language Definition +In Figure 3, we illustrate how the OTSL is defined. In essence, the OTSL defines only 5 tokens that directly describe a tabular structure based on an atomic 2D grid. +The OTSL vocabulary is comprised of the following tokens: +-"C" cell a new table cell that either has or does not have cell content +-"L" cell left-looking cell , merging with the left neighbor cell to create a span +-"U" cell up-looking cell , merging with the upper neighbor cell to create a span +-"X" cell cross cell , to merge with both left and upper neighbor cells +-"NL" new-line , switch to the next row. +A notable attribute of OTSL is that it has the capability of achieving lossless conversion to HTML. +Fig. 3. OTSL description of table structure: A - table example; B - graphical representation of table structure; C - mapping structure on a grid; D - OTSL structure encoding; E - explanation on cell encoding +
+ +Fig. 3. OTSL description of table structure: A - table example; B - graphical representation of table structure; C - mapping structure on a grid; D - OTSL structure encoding; E - explanation on cell encoding +
+4.2 Language Syntax +The OTSL representation follows these syntax rules: +1. Left-looking cell rule : The left neighbour of an "L" cell must be either another "L" cell or a "C" cell. +2. Up-looking cell rule : The upper neighbour of a "U" cell must be either another "U" cell or a "C" cell. +3. Cross cell rule : +: The left neighbour of an "X" cell must be either another "X" cell or a "U" cell, and the upper neighbour of an "X" cell must be either another "X" cell or an "L" cell. +4. First row rule : Only "L" cells and "C" cells are allowed in the first row. +5. First column rule : Only "U" cells and "C" cells are allowed in the first column. +6. Rectangular rule : The table representation is always rectangular - all rows must have an equal number of tokens, terminated with "NL" token. +The application of these rules gives OTSL a set of unique properties. First of all, the OTSL enforces a strictly rectangular structure representation, where every new-line token starts a new row. As a consequence, all rows and all columns have exactly the same number of tokens, irrespective of cell spans. Secondly, the OTSL representation is unambiguous: Every table structure is represented in one way. In this representation every table cell corresponds to a "C"-cell token, which in case of spans is always located in the top-left corner of the table cell definition. Third, OTSL syntax rules are only backward-looking. As a consequence, every predicted token can be validated straight during sequence generation by looking at the previously predicted sequence. As such, OTSL can guarantee that every predicted sequence is syntactically valid. +These characteristics can be easily learned by sequence generator networks, as we demonstrate further below. We find strong indications that this pattern +reduces significantly the column drift seen in the HTML based models (see Figure 5). +4.3 Error-detection and -mitigation +The design of OTSL allows to validate a table structure easily on an unfinished sequence. The detection of an invalid sequence token is a clear indication of a prediction mistake, however a valid sequence by itself does not guarantee prediction correctness. Different heuristics can be used to correct token errors in an invalid sequence and thus increase the chances for accurate predictions. Such heuristics can be applied either after the prediction of each token, or at the end on the entire predicted sequence. For example a simple heuristic which can correct the predicted OTSL sequence on-the-fly is to verify if the token with the highest prediction confidence invalidates the predicted sequence, and replace it by the token with the next highest confidence until OTSL rules are satisfied. +5 Experiments +To evaluate the impact of OTSL on prediction accuracy and inference times, we conducted a series of experiments based on the TableFormer model (Figure 4) with two objectives: Firstly we evaluate the prediction quality and performance of OTSL vs. HTML after performing Hyper Parameter Optimization (HPO) on the canonical PubTabNet data set. Secondly we pick the best hyper-parameters found in the first step and evaluate how OTSL impacts the performance of TableFormer after training on other publicly available data sets (FinTabNet, PubTables-1M [14]). The ground truth (GT) from all data sets has been converted into OTSL format for this purpose, and will be made publicly available. +Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach. +
+ +Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach. +
+We rely on standard metrics such as Tree Edit Distance score (TEDs) for table structure prediction, and Mean Average Precision (mAP) with 0.75 Intersection Over Union (IOU) threshold for the bounding-box predictions of table cells. The predicted OTSL structures were converted back to HTML format in +order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz. +5.1 Hyper Parameter Optimization +We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple and complex tables (tables with cell spans). Results are presented in Table. 1. It is evident that with OTSL, our model achieves the same TED score and slightly better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML. +Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart. + + + +##LanguageTEDsTEDsTEDsmAPInference +enc-layersdec-layersLanguagesimplecomplexall(0.75)time (secs) +66OTSL HTML0.965 0.9690.934 0.9270.955 0.9550.88 0.8572.73 5.39 +44OTSL HTML0.938 0.9520.904 0.9090.9270.8531.97 +24OTSL HTML0.9230.897 0.9010.938 0.9150.8433.77 +0.9450.9310.859 0.8341.91 3.81 +42OTSL HTML0.952 0.9440.92 0.9030.942 0.9310.857 0.8241.22 2 +
Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.
+5.2 Quantitative Results +We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained and evaluated it on three publicly available data sets: PubTabNet (395k samples), FinTabNet (113k samples) and PubTables-1M (about 1M samples). Performance results are presented in Table. 2. It is clearly evident that the model trained on OTSL outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables. +Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation. +Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using TableFormer [9] (with enc=6, dec=6, heads=8). + + + +LanguageTEDsTEDsTEDsmAP(0.75)Inference time (secs) +LanguagesimplecomplexallmAP(0.75)Inference time (secs) +PubTabNetOTSL0.9650.9340.9550.882.73 +PubTabNetHTML0.9690.9270.9550.8575.39 +FinTabNetOTSL0.9550.9610.9590.8621.85 +FinTabNetHTML0.9170.9220.920.7223.26 +PubTables-1MOTSL0.9870.9640.9770.8961.79 +PubTables-1MHTML0.9830.9440.9660.8893.26 +
Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using TableFormer [9] (with enc=6, dec=6, heads=8).
+5.3 Qualitative Results +To illustrate the qualitative differences between OTSL and HTML, Figure 5 demonstrates less overlap and more accurate bounding boxes with OTSL. In Figure 6, OTSL proves to be more effective in handling tables with longer token sequences, resulting in even more precise structure prediction and bounding boxes. +Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap (E) than the HTML model (D), when predicting the structure of a sparse table (A), at twice the inference speed because of shorter sequence length (B),(C). "PMC2807444_006_00.png" PubTabNet. μ +
+ +Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap (E) than the HTML model (D), when predicting the structure of a sparse table (A), at twice the inference speed because of shorter sequence length (B),(C). "PMC2807444_006_00.png" PubTabNet. μ +
+μ + +Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern of horizontally merged cells from the GT (A), unlike the HTML model (C). The HTML model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. "PMC5406406_003_01.png" PubTabNet. +
+ +Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern of horizontally merged cells from the GT (A), unlike the HTML model (C). The HTML model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. "PMC5406406_003_01.png" PubTabNet. +
+6 Conclusion +We demonstrated that representing tables in HTML for the task of table structure recognition with Im2Seq models is ill-suited and has serious limitations. Furthermore, we presented in this paper an Optimized Table Structure Language (OTSL) which, when compared to commonly used general purpose languages, has several key benefits. +First and foremost, given the same network configuration, inference time for a table-structure prediction is about 2 times faster compared to the conventional HTML approach. This is primarily owed to the shorter sequence length of the OTSL representation. Additional performance benefits can be obtained with HPO (hyper parameter optimization). As we demonstrate in our experiments, models trained on OTSL can be significantly smaller, e.g. by reducing the number of encoder and decoder layers, while preserving comparatively good prediction quality. This can further improve inference performance, yielding 5-6 times faster inference speed in OTSL with prediction quality comparable to models trained on HTML (see Table 1). +Secondly, OTSL has more inherent structure and a significantly restricted vocabulary size. This allows autoregressive models to perform better in the TED metric, but especially with regards to prediction accuracy of the table-cell bounding boxes (see Table 2). As shown in Figure 5, we observe that the OTSL drastically reduces the drift for table cell bounding boxes at high row count and in sparse tables. This leads to more accurate predictions and a significant reduction in post-processing complexity, which is an undesired necessity in HTML-based Im2Seq models. Significant novelty lies in OTSL syntactical rules, which are few, simple and always backwards looking. Each new token can be validated only by analyzing the sequence of previous tokens, without requiring the entire sequence to detect mistakes. This in return allows to perform structural error detection and correction on-the-fly during sequence generation. +References +1. Auer, C., Dolfi, M., Carvalho, A., Ramis, C.B., Staar, P.W.J.: Delivering document conversion as a cloud service with high throughput and responsiveness. CoRR abs/2206.00785 (2022). https://doi.org/10.48550/arXiv.2206.00785 , https://doi.org/10.48550/arXiv.2206.00785 +2. Chen, B., Peng, D., Zhang, J., Ren, Y., Jin, L.: Complex table structure recognition in the wild using transformer and identity matrix-based augmentation. In: Porwal, U., Fornés, A., Shafait, F. (eds.) Frontiers in Handwriting Recognition. pp. 545561. Springer International Publishing, Cham (2022) +3. Chi, Z., Huang, H., Xu, H.D., Yu, H., Yin, W., Mao, X.L.: Complicated table structure recognition. arXiv preprint arXiv:1908.04729 (2019) +4. Deng, Y., Rosenberg, D., Mann, G.: Challenges in end-to-end neural scientific table recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 894-901. IEEE (2019) +5. Kayal, P., Anand, M., Desai, H., Singh, M.: Tables to latex: structure and content extraction from scientific tables. International Journal on Document Analysis and Recognition (IJDAR) pp. 1-10 (2022) +6. Lee, E., Kwon, J., Yang, H., Park, J., Lee, S., Koo, H.I., Cho, N.I.: Table structure recognition based on grid shape graph. In: 2022 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). pp. 18681873. IEEE (2022) +7. Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: A benchmark dataset for table detection and recognition (2019) +8. Livathinos, N., Berrospi, C., Lysak, M., Kuropiatnyk, V., Nassar, A., Carvalho, A., Dolfi, M., Auer, C., Dinkla, K., Staar, P.: Robust pdf document conversion using recurrent neural networks. Proceedings of the AAAI Conference on Artificial Intelligence 35 (17), 15137-15145 (May 2021), https://ojs.aaai.org/index.php/ AAAI/article/view/17777 +9. Nassar, A., Livathinos, N., Lysak, M., Staar, P.: Tableformer: Table structure understanding with transformers. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4614-4623 (June 2022) +10. Pfitzmann, B., Auer, C., Dolfi, M., Nassar, A.S., Staar, P.W.J.: Doclaynet: A large human-annotated dataset for document-layout segmentation. In: Zhang, A., Rangwala, H. (eds.) KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, August 14 - 18, 2022. pp. 3743-3751. ACM (2022). https://doi.org/10.1145/3534678.3539043 , https:// doi.org/10.1145/3534678.3539043 +11. Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from imagebased documents. In: Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops. pp. 572-573 (2020) +12. Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162-1167. IEEE (2017) +13. Siddiqui, S.A., Fateh, I.A., Rizvi, S.T.R., Dengel, A., Ahmed, S.: Deeptabstr: Deep learning based table structure recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1403-1409 (2019). https:// doi.org/10.1109/ICDAR.2019.00226 +14. Smock, B., Pesala, R., Abraham, R.: PubTables-1M: Towards comprehensive table extraction from unstructured documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4634-4642 (June 2022) +15. Staar, P.W.J., Dolfi, M., Auer, C., Bekas, C.: Corpus conversion service: A machine learning platform to ingest documents at scale. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. pp. 774-782. KDD '18, Association for Computing Machinery, New York, NY, USA (2018). https://doi.org/10.1145/3219819.3219834 , https://doi.org/10. 1145/3219819.3219834 +16. Wang, X.: Tabular Abstraction, Editing, and Formatting. Ph.D. thesis, CAN (1996), aAINN09397 +17. Xue, W., Li, Q., Tao, D.: Res2tim: Reconstruct syntactic structures from table images. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 749-755. IEEE (2019) +18. Xue, W., Yu, B., Wang, W., Tao, D., Li, Q.: Tgrnet: A table graph reconstruction network for table structure recognition. In: Proceedings of the IEEE/CVF International Conference on Computer Vision. pp. 1295-1304 (2021) +19. Ye, J., Qi, X., He, Y., Chen, Y., Gu, D., Gao, P., Xiao, R.: Pingan-vcgroup's solution for icdar 2021 competition on scientific literature parsing task b: Table recognition to html (2021). https://doi.org/10.48550/ARXIV.2105.01848 , https://arxiv.org/abs/2105.01848 +20. Zhang, Z., Zhang, J., Du, J., Wang, F.: Split, embed and merge: An accurate table structure recognizer. Pattern Recognition 126 , 108565 (2022) +21. Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. In: 2021 IEEE Winter Conference on Applications of Computer Vision (WACV). pp. 697-706 (2021). https://doi.org/10.1109/WACV48630.2021. 00074 +22. Zhong, X., ShafieiBavani, E., Jimeno Yepes, A.: Image-based table recognition: Data, model, and evaluation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. (eds.) Computer Vision - ECCV 2020. pp. 564-580. Springer International Publishing, Cham (2020) +23. Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1015-1022. IEEE (2019) +
\ No newline at end of file diff --git a/tests/data/2305.03393v1.json b/tests/data/2305.03393v1.json index a547d423..e3174b78 100644 --- a/tests/data/2305.03393v1.json +++ b/tests/data/2305.03393v1.json @@ -1 +1 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2305.03393v1.pdf", "filename-prov": null, "document-hash": "62f2a2163d768d5b125a207967797aefa6c9cc113de8bb5c725c582595dd0c1d", "#-pages": 14, "collection-name": null, "description": null, "page-hashes": [{"hash": "7d7ef24bf2a048bcc229d37583b737ee85f67a02864236764abcaca9eabc8b68", "model": "default", "page": 1}, {"hash": "45bd6ad4d3e145029fa89fbf741a81d8885eb87ef03d6744221c61e66358451b", "model": "default", "page": 2}, {"hash": "69656f07bd8fb7afc53ab6f3d0e9153a337b550522493bf18d702c8406a9c545", "model": "default", "page": 3}, {"hash": "5afca9340c5bda646a75b8c2a1bde1b8f7b89e08a64a3cc4732fd11c1c6ead48", "model": "default", "page": 4}, {"hash": "d3b9daa8fd5d091fb5ef9bce44f085dd282a137e215574fec9556904b25cea8a", "model": "default", "page": 5}, {"hash": "eaaaaebf96b567c9bd5696b2dd4d747b3b3ad40e15ca8dc8968c56060315f228", "model": "default", "page": 6}, {"hash": "d786b8d564d7a7c122f2cf573f0cc1f11ea0a559d93f19cf020c11360bce00b4", "model": "default", "page": 7}, {"hash": "839d5ba3f9d079e8b42470002e4d7cb9ac60681cd9e2f2e3bf41afa6884a170e", "model": "default", "page": 8}, {"hash": "d50e5f3b8b4d1d5b04d5b253b187da6f40784bee5bf36b7eaefcabbc89e7b7a9", "model": "default", "page": 9}, {"hash": "a1509c4093fe25dbcb07c87f394506182323289a17dd189679c0b6d8238c5aae", "model": "default", "page": 10}, {"hash": "ac5ff01e648170bbe641d6fd95dc4f952a8e0bf62308f109b7c49678cef97005", "model": "default", "page": 11}, {"hash": "6a9aa589dc4faead43b032ec733af0c4a6fedfa834aa56b1bfefc7458ea949cc", "model": "default", "page": 12}, {"hash": "467ed0563b555b6fd2a0bd2e4a7bf596c066b8f08d2e1fd33f6c6d8b1c445759", "model": "default", "page": 13}, {"hash": "435efd2ece1dfed60a8dcc1f7fd72dde2cb58c59f5aebc4d5ae2227510195b42", "model": "default", "page": 14}]}, "main-text": [{"text": "arXiv:2305.03393v1 [cs.CV] 5 May 2023", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [16.329214096069336, 236.99996948242188, 36.6031608581543, 582.52001953125], "page": 1, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.61328125, 644.6187133789062, 480.59735107421875, 676.8052978515625], "page": 1, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, and Peter Staar [0000 - 0002 - 8088 - 0823]", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [138.6561737060547, 587.6192626953125, 476.05718994140625, 623.0816650390625], "page": 1, "span": [0, 238], "__ref_s3_data": null}]}, {"text": "IBM Research {mly,ahn,nli,cau,taa}@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [222.96609497070312, 555.623046875, 392.69110107421875, 575.94482421875], "page": 1, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "Abstract. Extracting tables from documents is a crucial task in any document conversion pipeline. Recently, transformer-based models have demonstrated that table-structure can be recognized with impressive accuracy using Image-to-Markup-Sequence (Im2Seq) approaches. Taking only the image of a table, such models predict a sequence of tokens (e.g. in HTML, LaTeX) which represent the structure of the table. Since the token representation of the table structure has a significant impact on the accuracy and run-time performance of any Im2Seq model, we investigate in this paper how table-structure representation can be optimised. We propose a new, optimised table-structure language (OTSL) with a minimized vocabulary and specific rules. The benefits of OTSL are that it reduces the number of tokens to 5 (HTML needs 28+) and shortens the sequence length to half of HTML on average. Consequently, model accuracy improves significantly, inference time is halved compared to HTML-based models, and the predicted table structures are always syntactically correct. This in turn eliminates most post-processing needs. Popular table structure data-sets will be published in OTSL format to the community.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [162.13674926757812, 327.2655334472656, 452.4198913574219, 522.533447265625], "page": 1, "span": [0, 1198], "__ref_s3_data": null}]}, {"text": "Keywords: Table Structure Recognition \u00b7 Data Representation \u00b7 Transformers \u00b7 Optimization.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [162.6794891357422, 293.8035888671875, 452.2415771484375, 314.24090576171875], "page": 1, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "1 Introduction", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.76512145996094, 259.3119201660156, 228.933837890625, 270.5150451660156], "page": 1, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "Tables are ubiquitous in documents such as scientific papers, patents, reports, manuals, specification sheets or marketing material. They often encode highly valuable information and therefore need to be extracted with high accuracy. Unfortunately, tables appear in documents in various sizes, styling and structure, making it difficult to recover their correct structure with simple analytical methods. Therefore, accurate table extraction is achieved these days with machine-learning based methods.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.01023864746094, 163.12771606445312, 480.595947265625, 244.2879638671875], "page": 1, "span": [0, 500], "__ref_s3_data": null}]}, {"text": "In modern document understanding systems [1,15], table extraction is typically a two-step process. Firstly, every table on a page is located with a bounding box, and secondly, their logical row and column structure is recognized. As of", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.044189453125, 126.84117889404297, 480.74835205078125, 160.30677795410156], "page": 1, "span": [0, 235], "__ref_s3_data": null}]}, {"text": "2", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.28973388671875, 690.1593017578125, 139.494384765625, 698.4556884765625], "page": 2, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [167.312744140625, 689.8800048828125, 231.72227478027344, 699.0272827148438], "page": 2, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, (B) minimal graphical representation of table structure using rectangular layout, (C) HTML representation, (D) OTSL representation. This example demonstrates many of the key-features of OTSL, namely its reduced vocabulary size (12 versus 5 in this case), its reduced sequence length (55 versus 30) and a enhanced internal structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.99227905273438, 591.5379028320312, 480.7561950683594, 666.4251098632812], "page": 2, "span": [0, 574], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"text": "today, table detection in documents is a well understood problem, and the latest state-of-the-art (SOTA) object detection methods provide an accuracy comparable to human observers [7,8,10,14,23]. On the other hand, the problem of table structure recognition (TSR) is a lot more challenging and remains a very active area of research, in which many novel machine learning algorithms are being explored [3,4,5,9,11,12,13,14,17,18,21,22].", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.9597930908203, 270.46295166015625, 480.5923156738281, 340.515380859375], "page": 2, "span": [0, 435], "__ref_s3_data": null}]}, {"text": "Recently emerging SOTA methods for table structure recognition employ transformer-based models, in which an image of the table is provided to the network in order to predict the structure of the table as a sequence of tokens. These image-to-sequence (Im2Seq) models are extremely powerful, since they allow for a purely data-driven solution. The tokens of the sequence typically belong to a markup language such as HTML, Latex or Markdown, which allow to describe table structure as rows, columns and spanning cells in various configurations. In Figure 1, we illustrate how HTML is used to represent the table-structure of a particular example table. Public table-structure data sets such as PubTabNet [22], and FinTabNet [21], which were created in a semi-automated way from paired PDF and HTML sources (e.g. PubMed Central), popularized primarily the use of HTML as ground-truth representation format for TSR.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.86209106445312, 126.80567932128906, 480.5948181152344, 268.64990234375], "page": 2, "span": [0, 911], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [194.0343780517578, 689.6653442382812, 447.54290771484375, 698.948486328125], "page": 3, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "3", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [474.95513916015625, 690.1593017578125, 480.59124755859375, 698.3677978515625], "page": 3, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "While the majority of research in TSR is currently focused on the development and application of novel neural model architectures, the table structure representation language (e.g. HTML in PubTabNet and FinTabNet) is usually adopted as is for the sequence tokenization in Im2Seq models. In this paper, we aim for the opposite and investigate the impact of the table structure representation language with an otherwise unmodified Im2Seq transformer-based architecture. Since the current state-of-the-art Im2Seq model is TableFormer [9], we select this model to perform our experiments.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.981201171875, 579.9556884765625, 480.7418212890625, 673.815185546875], "page": 3, "span": [0, 584], "__ref_s3_data": null}]}, {"text": "The main contribution of this paper is the introduction of a new optimised table structure language (OTSL), specifically designed to describe table-structure in an compact and structured way for Im2Seq models. OTSL has a number of key features, which make it very attractive to use in Im2Seq models. Specifically, compared to other languages such as HTML, OTSL has a minimized vocabulary which yields short sequence length, strong inherent structure (e.g. strict rectangular layout) and a strict syntax with rules that only look backwards. The latter allows for syntax validation during inference and ensures a syntactically correct table-structure. These OTSL features are illustrated in Figure 1, in comparison to HTML.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.7724151611328, 460.7701416015625, 480.87481689453125, 577.6600341796875], "page": 3, "span": [0, 721], "__ref_s3_data": null}]}, {"text": "The paper is structured as follows. In section 2, we give an overview of the latest developments in table-structure reconstruction. In section 3 we review the current HTML table encoding (popularised by PubTabNet and FinTabNet) and discuss its flaws. Subsequently, we introduce OTSL in section 4, which includes the language definition, syntax rules and error-correction procedures. In section 5, we apply OTSL on the TableFormer architecture, compare it to TableFormer models trained on HTML and ultimately demonstrate the advantages of using OTSL. Finally, in section 6 we conclude our work and outline next potential steps.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.7509765625, 352.1451110839844, 480.6080017089844, 458.64886474609375], "page": 3, "span": [0, 626], "__ref_s3_data": null}]}, {"text": "2 Related Work", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.4993896484375, 319.3436584472656, 236.76913452148438, 330.5750732421875], "page": 3, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "Approaches to formalize the logical structure and layout of tables in electronic documents date back more than two decades [16]. In the recent past, a wide variety of computer vision methods have been explored to tackle the problem of table structure recognition, i.e. the correct identification of columns, rows and spanning cells in a given table. Broadly speaking, the current deeplearning based approaches fall into three categories: object detection (OD) methods, Graph-Neural-Network (GNN) methods and Image-to-Markup-Sequence (Im2Seq) methods. Object-detection based methods [11,12,13,14,21] rely on tablestructure annotation using (overlapping) bounding boxes for training, and produce bounding-box predictions to define table cells, rows, and columns on a table image. Graph Neural Network (GNN) based methods [3,6,17,18], as the name suggests, represent tables as graph structures. The graph nodes represent the content of each table cell, an embedding vector from the table image, or geometric coordinates of the table cell. The edges of the graph define the relationship between the nodes, e.g. if they belong to the same column, row, or table cell.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.65347290039062, 126.65711212158203, 484.1204833984375, 304.6298522949219], "page": 3, "span": [0, 1161], "__ref_s3_data": null}]}, {"text": "4 M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.52096557617188, 690.1593017578125, 231.72227478027344, 699.0346069335938], "page": 4, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "Other work [20] aims at predicting a grid for each table and deciding which cells must be merged using an attention network. Im2Seq methods cast the problem as a sequence generation task [4,5,9,22], and therefore need an internal tablestructure representation language, which is often implemented with standard markup languages (e.g. HTML, LaTeX, Markdown). In theory, Im2Seq methods have a natural advantage over the OD and GNN methods by virtue of directly predicting the table-structure. As such, no post-processing or rules are needed in order to obtain the table-structure, which is necessary with OD and GNN approaches. In practice, this is not entirely true, because a predicted sequence of table-structure markup does not necessarily have to be syntactically correct. Hence, depending on the quality of the predicted sequence, some post-processing needs to be performed to ensure a syntactically valid (let alone correct) sequence.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.7613983154297, 532.5480346679688, 480.6270446777344, 674.1491088867188], "page": 4, "span": [0, 939], "__ref_s3_data": null}]}, {"text": "Within the Im2Seq method, we find several popular models, namely the encoder-dual-decoder model (EDD) [22], TableFormer [9], Tabsplitter[2] and Ye et. al. [19]. EDD uses two consecutive long short-term memory (LSTM) decoders to predict a table in HTML representation. The tag decoder predicts a sequence of HTML tags. For each decoded table cell ( ), the attention is passed to the cell decoder to predict the content with an embedded OCR approach. The latter makes it susceptible to transcription errors in the cell content of the table. TableFormer address this reliance on OCR and uses two transformer decoders for HTML structure and cell bounding box prediction in an end-to-end architecture. The predicted cell bounding box is then used to extract text tokens from an originating (digital) PDF page, circumventing any need for OCR. TabSplitter [2] proposes a compact double-matrix representation of table rows and columns to do error detection and error correction of HTML structure sequences based on predictions from [19]. This compact double-matrix representation can not be used directly by the Img2seq model training, so the model uses HTML as an intermediate form. Chi et. al. [4] introduce a data set and a baseline method using bidirectional LSTMs to predict LaTeX code. Kayal [5] introduces Gated ResNet transformers to predict LaTeX code, and a separate OCR module to extract content.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.5825958251953, 305.3533020019531, 480.7930908203125, 530.6050415039062], "page": 4, "span": [0, 1404], "__ref_s3_data": null}]}, {"text": "Im2Seq approaches have shown to be well-suited for the TSR task and allow a full end-to-end network design that can output the final table structure without pre- or post-processing logic. Furthermore, Im2Seq models have demonstrated to deliver state-of-the-art prediction accuracy [9]. This motivated the authors to investigate if the performance (both in accuracy and inference time) can be further improved by optimising the table structure representation language. We believe this is a necessary step before further improving neural network architectures for this task.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.88829040527344, 209.4513397216797, 480.5937805175781, 303.2884216308594], "page": 4, "span": [0, 572], "__ref_s3_data": null}]}, {"text": "3 Problem Statement", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.42018127441406, 175.88177490234375, 269.6244201660156, 186.8051300048828], "page": 4, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "All known Im2Seq based models for TSR fundamentally work in similar ways. Given an image of a table, the Im2Seq model predicts the structure of the table by generating a sequence of tokens. These tokens originate from a finite vocab-", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.80313110351562, 126.69752502441406, 480.59368896484375, 160.46705627441406], "page": 4, "span": [0, 233], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [194.02210998535156, 689.8338623046875, 447.54290771484375, 698.9061889648438], "page": 5, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "5", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [475.1318664550781, 690.1593017578125, 480.59124755859375, 698.4717407226562], "page": 5, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "ulary and can be interpreted as a table structure. For example, with the HTML tokens ,
, , , and , one can construct simple table structures without any spanning cells. In reality though, one needs at least 28 HTML tokens to describe the most common complex tables observed in real-world documents [21,22], due to a variety of spanning cells definitions in the HTML token vocabulary.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.90025329589844, 604.4931640625, 480.7872619628906, 673.93798828125], "page": 5, "span": [0, 422], "__ref_s3_data": null}]}, {"text": "Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [145.19676208496094, 562.5794677734375, 469.7522277832031, 571.8128051757812], "page": 5, "span": [0, 73], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"text": "Obviously, HTML and other general-purpose markup languages were not designed for Im2Seq models. As such, they have some serious drawbacks. First, the token vocabulary needs to be artificially large in order to describe all plausible tabular structures. Since most Im2Seq models use an autoregressive approach, they generate the sequence token by token. Therefore, to reduce inference time, a shorter sequence length is critical. Every table-cell is represented by at least two tokens ( and ). Furthermore, when tokenizing the HTML structure, one needs to explicitly enumerate possible column-spans and row-spans as words. In practice, this ends up requiring 28 different HTML tokens (when including column- and row-spans up to 10 cells) just to describe every table in the PubTabNet dataset. Clearly, not every token is equally represented, as is depicted in Figure 2. This skewed distribution of tokens in combination with variable token row-length makes it challenging for models to learn the HTML structure.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.7060546875, 259.57940673828125, 480.62744140625, 424.87249755859375], "page": 5, "span": [0, 1021], "__ref_s3_data": null}]}, {"text": "Additionally, it would be desirable if the representation would easily allow an early detection of invalid sequences on-the-go, before the prediction of the entire table structure is completed. HTML is not well-suited for this purpose as the verification of incomplete sequences is non-trivial or even impossible.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.89939880371094, 210.46835327148438, 480.5928955078125, 257.10150146484375], "page": 5, "span": [0, 313], "__ref_s3_data": null}]}, {"text": "In a valid HTML table, the token sequence must describe a 2D grid of table cells, serialised in row-major ordering, where each row and each column have the same length (while considering row- and column-spans). Furthermore, every opening tag in HTML needs to be matched by a closing tag in a correct hierarchical manner. Since the number of tokens for each table row and column can vary significantly, especially for large tables with many row- and column-spans, it is complex to verify the consistency of predicted structures during sequence", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.75929260253906, 126.89654541015625, 480.5947265625, 208.89126586914062], "page": 5, "span": [0, 542], "__ref_s3_data": null}]}, {"text": "6", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.12826538085938, 690.1593017578125, 139.453125, 698.234130859375], "page": 6, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [167.2993927001953, 690.0819091796875, 231.72227478027344, 698.99951171875], "page": 6, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "generation. Implicitly, this also means that Im2Seq models need to learn these complex syntax rules, simply to deliver valid output.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.94253540039062, 651.3041381835938, 480.59478759765625, 673.705078125], "page": 6, "span": [0, 132], "__ref_s3_data": null}]}, {"text": "In practice, we observe two major issues with prediction quality when training Im2Seq models on HTML table structure generation from images. On the one hand, we find that on large tables, the visual attention of the model often starts to drift and is not accurately moving forward cell by cell anymore. This manifests itself in either in an increasing location drift for proposed table-cells in later rows on the same column or even complete loss of vertical alignment, as illustrated in Figure 5. Addressing this with post-processing is partially possible, but clearly undesired. On the other hand, we find many instances of predictions with structural inconsistencies or plain invalid HTML output, as shown in Figure 6, which are nearly impossible to properly correct. Both problems seriously impact the TSR model performance, since they reflect not only in the task of pure structure recognition but also in the equally crucial recognition or matching of table cell content.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.64344787597656, 496.2580871582031, 480.595703125, 649.443603515625], "page": 6, "span": [0, 977], "__ref_s3_data": null}]}, {"text": "4 Optimised Table Structure Language", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.07444763183594, 460.4577331542969, 372.50848388671875, 472.3045959472656], "page": 6, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "To mitigate the issues with HTML in Im2Seq-based TSR models laid out before, we propose here our Optimised Table Structure Language (OTSL). OTSL is designed to express table structure with a minimized vocabulary and a simple set of rules, which are both significantly reduced compared to HTML. At the same time, OTSL enables easy error detection and correction during sequence generation. We further demonstrate how the compact structure representation and minimized sequence length improves prediction accuracy and inference time in the TableFormer architecture.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.82858276367188, 350.400146484375, 480.5947265625, 443.65216064453125], "page": 6, "span": [0, 563], "__ref_s3_data": null}]}, {"text": "4.1 Language Definition", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.0214385986328, 316.9593811035156, 261.80108642578125, 326.9925231933594], "page": 6, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "In Figure 3, we illustrate how the OTSL is defined. In essence, the OTSL defines only 5 tokens that directly describe a tabular structure based on an atomic 2D grid.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.03182983398438, 269.9826354980469, 480.5887145996094, 303.5955505371094], "page": 6, "span": [0, 165], "__ref_s3_data": null}]}, {"text": "The OTSL vocabulary is comprised of the following tokens:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [149.35653686523438, 256.95648193359375, 409.3113708496094, 266.98114013671875], "page": 6, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "-\"C\" cell a new table cell that either has or does not have cell content", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [139.9448699951172, 235.22317504882812, 460.54443359375, 245.30445861816406], "page": 6, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "-\"L\" cell left-looking cell , merging with the left neighbor cell to create a span", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [139.9716796875, 210.11834716796875, 480.59393310546875, 232.8718719482422], "page": 6, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "-\"U\" cell up-looking cell , merging with the upper neighbor cell to create a span", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [140.17970275878906, 184.99545288085938, 480.58856201171875, 207.94252014160156], "page": 6, "span": [0, 81], "__ref_s3_data": null}]}, {"text": "-\"X\" cell cross cell , to merge with both left and upper neighbor cells", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [139.92364501953125, 172.88253784179688, 454.5549621582031, 183.41383361816406], "page": 6, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "-\"NL\" new-line , switch to the next row.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [139.87696838378906, 160.93917846679688, 328.61676025390625, 170.83633422851562], "page": 6, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "A notable attribute of OTSL is that it has the capability of achieving lossless conversion to HTML.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.19346618652344, 127.14515686035156, 480.5928039550781, 148.89442443847656], "page": 6, "span": [0, 99], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [193.9747772216797, 689.7752685546875, 447.54290771484375, 698.8756103515625], "page": 7, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "7", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [475.3976135253906, 690.1593017578125, 480.59124755859375, 698.609375], "page": 7, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Fig. 3. OTSL description of table structure: A - table example; B - graphical representation of table structure; C - mapping structure on a grid; D - OTSL structure encoding; E - explanation on cell encoding", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [133.8881378173828, 635.6204833984375, 480.58740234375, 667.1154174804688], "page": 7, "span": [0, 207], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"text": "4.2 Language Syntax", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.2874298095703, 477.7056579589844, 246.78787231445312, 487.5195007324219], "page": 7, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "The OTSL representation follows these syntax rules:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.23097229003906, 457.80255126953125, 363.7961730957031, 467.56781005859375], "page": 7, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "1. Left-looking cell rule : The left neighbour of an \"L\" cell must be either another \"L\" cell or a \"C\" cell.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.97299194335938, 424.0662536621094, 480.5890197753906, 445.8700256347656], "page": 7, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "2. Up-looking cell rule : The upper neighbour of a \"U\" cell must be either another \"U\" cell or a \"C\" cell.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.19281005859375, 400.15325927734375, 480.59228515625, 421.95819091796875], "page": 7, "span": [0, 106], "__ref_s3_data": null}]}, {"text": "3. Cross cell rule :", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [138.06527709960938, 388.19525146484375, 226.0736083984375, 397.4916687011719], "page": 7, "span": [0, 20], "__ref_s3_data": null}]}, {"text": ": The left neighbour of an \"X\" cell must be either another \"X\" cell or a \"U\" cell, and the upper neighbour of an \"X\" cell must be either another \"X\" cell or an \"L\" cell.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [146.40036010742188, 352.3262939453125, 480.5923767089844, 396.9922180175781], "page": 7, "span": [0, 169], "__ref_s3_data": null}]}, {"text": "4. First row rule : Only \"L\" cells and \"C\" cells are allowed in the first row.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.39491271972656, 339.79541015625, 474.5901794433594, 349.8867492675781], "page": 7, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "5. First column rule : Only \"U\" cells and \"C\" cells are allowed in the first column.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.3254852294922, 316.4543151855469, 480.58746337890625, 338.0946960449219], "page": 7, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "6. Rectangular rule : The table representation is always rectangular - all rows must have an equal number of tokens, terminated with \"NL\" token.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.22427368164062, 292.2819519042969, 480.5945739746094, 314.491455078125], "page": 7, "span": [0, 144], "__ref_s3_data": null}]}, {"text": "The application of these rules gives OTSL a set of unique properties. First of all, the OTSL enforces a strictly rectangular structure representation, where every new-line token starts a new row. As a consequence, all rows and all columns have exactly the same number of tokens, irrespective of cell spans. Secondly, the OTSL representation is unambiguous: Every table structure is represented in one way. In this representation every table cell corresponds to a \"C\"-cell token, which in case of spans is always located in the top-left corner of the table cell definition. Third, OTSL syntax rules are only backward-looking. As a consequence, every predicted token can be validated straight during sequence generation by looking at the previously predicted sequence. As such, OTSL can guarantee that every predicted sequence is syntactically valid.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.6158447265625, 149.74966430664062, 480.5958251953125, 280.5412292480469], "page": 7, "span": [0, 848], "__ref_s3_data": null}]}, {"text": "These characteristics can be easily learned by sequence generator networks, as we demonstrate further below. We find strong indications that this pattern", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.04405212402344, 126.91014099121094, 480.5926513671875, 148.8981170654297], "page": 7, "span": [0, 153], "__ref_s3_data": null}]}, {"text": "8", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.1900634765625, 690.1593017578125, 139.46353149414062, 698.3311767578125], "page": 8, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [167.40870666503906, 690.0598754882812, 231.72227478027344, 699.074462890625], "page": 8, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "reduces significantly the column drift seen in the HTML based models (see Figure 5).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.2002410888672, 651.7838745117188, 480.5888366699219, 673.7068481445312], "page": 8, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "4.3 Error-detection and -mitigation", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.25576782226562, 620.8721313476562, 319.3470764160156, 630.8031005859375], "page": 8, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "The design of OTSL allows to validate a table structure easily on an unfinished sequence. The detection of an invalid sequence token is a clear indication of a prediction mistake, however a valid sequence by itself does not guarantee prediction correctness. Different heuristics can be used to correct token errors in an invalid sequence and thus increase the chances for accurate predictions. Such heuristics can be applied either after the prediction of each token, or at the end on the entire predicted sequence. For example a simple heuristic which can correct the predicted OTSL sequence on-the-fly is to verify if the token with the highest prediction confidence invalidates the predicted sequence, and replace it by the token with the next highest confidence until OTSL rules are satisfied.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.90631103515625, 492.9853515625, 480.59576416015625, 610.5565185546875], "page": 8, "span": [0, 797], "__ref_s3_data": null}]}, {"text": "5 Experiments", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.63143920898438, 459.85089111328125, 229.03533935546875, 471.56646728515625], "page": 8, "span": [0, 13], "__ref_s3_data": null}]}, {"text": "To evaluate the impact of OTSL on prediction accuracy and inference times, we conducted a series of experiments based on the TableFormer model (Figure 4) with two objectives: Firstly we evaluate the prediction quality and performance of OTSL vs. HTML after performing Hyper Parameter Optimization (HPO) on the canonical PubTabNet data set. Secondly we pick the best hyper-parameters found in the first step and evaluate how OTSL impacts the performance of TableFormer after training on other publicly available data sets (FinTabNet, PubTables-1M [14]). The ground truth (GT) from all data sets has been converted into OTSL format for this purpose, and will be made publicly available.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.63893127441406, 339.67877197265625, 480.6024475097656, 445.8916015625], "page": 8, "span": [0, 684], "__ref_s3_data": null}]}, {"text": "Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [134.0367889404297, 287.69140625, 480.5908203125, 308.2715148925781], "page": 8, "span": [0, 104], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"text": "We rely on standard metrics such as Tree Edit Distance score (TEDs) for table structure prediction, and Mean Average Precision (mAP) with 0.75 Intersection Over Union (IOU) threshold for the bounding-box predictions of table cells. The predicted OTSL structures were converted back to HTML format in", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.83853149414062, 126.85651397705078, 480.59173583984375, 172.45193481445312], "page": 8, "span": [0, 299], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [193.94395446777344, 689.7586669921875, 447.54290771484375, 698.8834228515625], "page": 9, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "9", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [474.9051818847656, 690.1593017578125, 480.59124755859375, 698.5001831054688], "page": 9, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.90585327148438, 640.3582153320312, 480.5957946777344, 673.7608642578125], "page": 9, "span": [0, 163], "__ref_s3_data": null}]}, {"text": "5.1 Hyper Parameter Optimization", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.28504943847656, 613.6966552734375, 318.44842529296875, 623.6006469726562], "page": 9, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple and complex tables (tables with cell spans). Results are presented in Table. 1. It is evident that with OTSL, our model achieves the same TED score and slightly better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.80441284179688, 537.6300659179688, 481.1519775390625, 607.1452026367188], "page": 9, "span": [0, 423], "__ref_s3_data": null}]}, {"text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [133.88543701171875, 464.55596923828125, 480.59539794921875, 517.7815551757812], "page": 9, "span": [0, 398], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"text": "5.2 Quantitative Results", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.48985290527344, 274.2215881347656, 264.4033203125, 284.3811950683594], "page": 9, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained and evaluated it on three publicly available data sets: PubTabNet (395k samples), FinTabNet (113k samples) and PubTables-1M (about 1M samples). Performance results are presented in Table. 2. It is clearly evident that the model trained on OTSL outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.97792053222656, 174.46827697753906, 480.59576416015625, 268.4878234863281], "page": 9, "span": [0, 555], "__ref_s3_data": null}]}, {"text": "Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.90371704101562, 126.73831176757812, 480.6639099121094, 172.7313995361328], "page": 9, "span": [0, 289], "__ref_s3_data": null}]}, {"text": "10", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.6792755126953, 690.1593017578125, 144.2487335205078, 698.4376831054688], "page": 10, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [167.2496337890625, 690.1593017578125, 231.72048950195312, 699.0352783203125], "page": 10, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using TableFormer [9] (with enc=6, dec=6, heads=8).", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [134.00595092773438, 645.5076904296875, 480.59356689453125, 677.1614379882812], "page": 10, "span": [0, 192], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"text": "5.3 Qualitative Results", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.25314331054688, 493.7161560058594, 257.19561767578125, 503.76678466796875], "page": 10, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "To illustrate the qualitative differences between OTSL and HTML, Figure 5 demonstrates less overlap and more accurate bounding boxes with OTSL. In Figure 6, OTSL proves to be more effective in handling tables with longer token sequences, resulting in even more precise structure prediction and bounding boxes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.7931365966797, 425.5223083496094, 480.6096496582031, 483.0732421875], "page": 10, "span": [0, 309], "__ref_s3_data": null}]}, {"text": "Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap (E) than the HTML model (D), when predicting the structure of a sparse table (A), at twice the inference speed because of shorter sequence length (B),(C). \"PMC2807444_006_00.png\" PubTabNet. \u03bc", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [133.934326171875, 352.2828369140625, 480.591064453125, 395.2126770019531], "page": 10, "span": [0, 270], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"text": "\u03bc", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [227.91465759277344, 116.65360260009766, 230.10028076171875, 126.1739730834961], "page": 10, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "\u2265", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [300.58056640625, 98.57134246826172, 302.72637939453125, 108.3780517578125], "page": 10, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [194.172119140625, 689.804443359375, 447.54290771484375, 698.850830078125], "page": 11, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "11", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [471.22021484375, 690.1593017578125, 480.5894775390625, 698.3983154296875], "page": 11, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern of horizontally merged cells from the GT (A), unlike the HTML model (C). The HTML model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. \"PMC5406406_003_01.png\" PubTabNet.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [134.00157165527344, 613.6331176757812, 480.82830810546875, 667.0059814453125], "page": 11, "span": [0, 390], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"text": "12 M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.69354248046875, 690.152099609375, 231.72048950195312, 698.9852905273438], "page": 12, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "6 Conclusion", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.32138061523438, 663.8826293945312, 219.25479125976562, 675.0826416015625], "page": 12, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "We demonstrated that representing tables in HTML for the task of table structure recognition with Im2Seq models is ill-suited and has serious limitations. Furthermore, we presented in this paper an Optimized Table Structure Language (OTSL) which, when compared to commonly used general purpose languages, has several key benefits.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [134.07997131347656, 588.5181884765625, 480.595703125, 645.8515014648438], "page": 12, "span": [0, 330], "__ref_s3_data": null}]}, {"text": "First and foremost, given the same network configuration, inference time for a table-structure prediction is about 2 times faster compared to the conventional HTML approach. This is primarily owed to the shorter sequence length of the OTSL representation. Additional performance benefits can be obtained with HPO (hyper parameter optimization). As we demonstrate in our experiments, models trained on OTSL can be significantly smaller, e.g. by reducing the number of encoder and decoder layers, while preserving comparatively good prediction quality. This can further improve inference performance, yielding 5-6 times faster inference speed in OTSL with prediction quality comparable to models trained on HTML (see Table 1).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.63015747070312, 467.4183654785156, 480.6451416015625, 585.736328125], "page": 12, "span": [0, 724], "__ref_s3_data": null}]}, {"text": "Secondly, OTSL has more inherent structure and a significantly restricted vocabulary size. This allows autoregressive models to perform better in the TED metric, but especially with regards to prediction accuracy of the table-cell bounding boxes (see Table 2). As shown in Figure 5, we observe that the OTSL drastically reduces the drift for table cell bounding boxes at high row count and in sparse tables. This leads to more accurate predictions and a significant reduction in post-processing complexity, which is an undesired necessity in HTML-based Im2Seq models. Significant novelty lies in OTSL syntactical rules, which are few, simple and always backwards looking. Each new token can be validated only by analyzing the sequence of previous tokens, without requiring the entire sequence to detect mistakes. This in return allows to perform structural error detection and correction on-the-fly during sequence generation.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [133.8241424560547, 323.7073974609375, 480.5948181152344, 465.1226806640625], "page": 12, "span": [0, 926], "__ref_s3_data": null}]}, {"text": "References", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [134.31680297851562, 287.61077880859375, 197.68641662597656, 298.98321533203125], "page": 12, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "1. Auer, C., Dolfi, M., Carvalho, A., Ramis, C.B., Staar, P.W.J.: Delivering document conversion as a cloud service with high throughput and responsiveness. CoRR abs/2206.00785 (2022). https://doi.org/10.48550/arXiv.2206.00785 , https://doi.org/10.48550/arXiv.2206.00785", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [139.37100219726562, 227.38706970214844, 480.5920104980469, 269.8235168457031], "page": 12, "span": [0, 270], "__ref_s3_data": null}]}, {"text": "2. Chen, B., Peng, D., Zhang, J., Ren, Y., Jin, L.: Complex table structure recognition in the wild using transformer and identity matrix-based augmentation. In: Porwal, U., Forn\u00e9s, A., Shafait, F. (eds.) Frontiers in Handwriting Recognition. pp. 545561. Springer International Publishing, Cham (2022)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.86715698242188, 182.8286590576172, 480.6174011230469, 225.87879943847656], "page": 12, "span": [0, 301], "__ref_s3_data": null}]}, {"text": "3. Chi, Z., Huang, H., Xu, H.D., Yu, H., Yin, W., Mao, X.L.: Complicated table structure recognition. arXiv preprint arXiv:1908.04729 (2019)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.72738647460938, 160.16236877441406, 480.5873107910156, 181.41339111328125], "page": 12, "span": [0, 140], "__ref_s3_data": null}]}, {"text": "4. Deng, Y., Rosenberg, D., Mann, G.: Challenges in end-to-end neural scientific table recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 894-901. IEEE (2019)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.9593963623047, 126.65552520751953, 480.5882568359375, 157.8516387939453], "page": 12, "span": [0, 204], "__ref_s3_data": null}]}, {"text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [194.0724639892578, 689.6328735351562, 447.54290771484375, 698.8519287109375], "page": 13, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "13", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [471.1661376953125, 690.1593017578125, 480.5894775390625, 698.4201049804688], "page": 13, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "5. Kayal, P., Anand, M., Desai, H., Singh, M.: Tables to latex: structure and content extraction from scientific tables. International Journal on Document Analysis and Recognition (IJDAR) pp. 1-10 (2022)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.6960906982422, 641.0914306640625, 480.59478759765625, 672.9320068359375], "page": 13, "span": [0, 203], "__ref_s3_data": null}]}, {"text": "6. Lee, E., Kwon, J., Yang, H., Park, J., Lee, S., Koo, H.I., Cho, N.I.: Table structure recognition based on grid shape graph. In: 2022 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). pp. 18681873. IEEE (2022)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.54495239257812, 598.4913940429688, 480.7531433105469, 640.2967529296875], "page": 13, "span": [0, 264], "__ref_s3_data": null}]}, {"text": "7. Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: A benchmark dataset for table detection and recognition (2019)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [139.07086181640625, 576.4161376953125, 480.5901184082031, 596.6123046875], "page": 13, "span": [0, 131], "__ref_s3_data": null}]}, {"text": "8. Livathinos, N., Berrospi, C., Lysak, M., Kuropiatnyk, V., Nassar, A., Carvalho, A., Dolfi, M., Auer, C., Dinkla, K., Staar, P.: Robust pdf document conversion using recurrent neural networks. Proceedings of the AAAI Conference on Artificial Intelligence 35 (17), 15137-15145 (May 2021), https://ojs.aaai.org/index.php/ AAAI/article/view/17777", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.5443878173828, 521.7116088867188, 480.8269348144531, 574.5029296875], "page": 13, "span": [0, 345], "__ref_s3_data": null}]}, {"text": "9. Nassar, A., Livathinos, N., Lysak, M., Staar, P.: Tableformer: Table structure understanding with transformers. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4614-4623 (June 2022)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [138.21878051757812, 487.909423828125, 480.5938720703125, 519.8042602539062], "page": 13, "span": [0, 234], "__ref_s3_data": null}]}, {"text": "10. Pfitzmann, B., Auer, C., Dolfi, M., Nassar, A.S., Staar, P.W.J.: Doclaynet: A large human-annotated dataset for document-layout segmentation. In: Zhang, A., Rangwala, H. (eds.) KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, August 14 - 18, 2022. pp. 3743-3751. ACM (2022). https://doi.org/10.1145/3534678.3539043 , https:// doi.org/10.1145/3534678.3539043", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.7440185546875, 422.8146057128906, 480.6158447265625, 486.7056579589844], "page": 13, "span": [0, 413], "__ref_s3_data": null}]}, {"text": "11. Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from imagebased documents. In: Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops. pp. 572-573 (2020)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.48020935058594, 378.9383850097656, 480.59295654296875, 421.14239501953125], "page": 13, "span": [0, 295], "__ref_s3_data": null}]}, {"text": "12. Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162-1167. IEEE (2017)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.6136016845703, 334.68109130859375, 480.6297302246094, 377.08355712890625], "page": 13, "span": [0, 281], "__ref_s3_data": null}]}, {"text": "13. Siddiqui, S.A., Fateh, I.A., Rizvi, S.T.R., Dengel, A., Ahmed, S.: Deeptabstr: Deep learning based table structure recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1403-1409 (2019). https:// doi.org/10.1109/ICDAR.2019.00226", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.72238159179688, 290.7889099121094, 480.75555419921875, 333.61895751953125], "page": 13, "span": [0, 275], "__ref_s3_data": null}]}, {"text": "14. Smock, B., Pesala, R., Abraham, R.: PubTables-1M: Towards comprehensive table extraction from unstructured documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4634-4642 (June 2022)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.3740997314453, 247.3230743408203, 480.5928649902344, 289.9039306640625], "page": 13, "span": [0, 241], "__ref_s3_data": null}]}, {"text": "15. Staar, P.W.J., Dolfi, M., Auer, C., Bekas, C.: Corpus conversion service: A machine learning platform to ingest documents at scale. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. pp. 774-782. KDD '18, Association for Computing Machinery, New York, NY, USA (2018). https://doi.org/10.1145/3219819.3219834 , https://doi.org/10. 1145/3219819.3219834", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.6051483154297, 181.90472412109375, 480.6208190917969, 245.70274353027344], "page": 13, "span": [0, 405], "__ref_s3_data": null}]}, {"text": "16. Wang, X.: Tabular Abstraction, Editing, and Formatting. Ph.D. thesis, CAN (1996), aAINN09397", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.76400756835938, 159.9412841796875, 480.5954284667969, 179.845703125], "page": 13, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "17. Xue, W., Li, Q., Tao, D.: Res2tim: Reconstruct syntactic structures from table images. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 749-755. IEEE (2019)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.76400756835938, 126.6559829711914, 480.5911865234375, 157.7118377685547], "page": 13, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "14 M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [134.76499938964844, 690.1593017578125, 231.72048950195312, 699.0250244140625], "page": 14, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "18. Xue, W., Yu, B., Wang, W., Tao, D., Li, Q.: Tgrnet: A table graph reconstruction network for table structure recognition. In: Proceedings of the IEEE/CVF International Conference on Computer Vision. pp. 1295-1304 (2021)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.63540649414062, 641.2738647460938, 480.59112548828125, 673.007568359375], "page": 14, "span": [0, 223], "__ref_s3_data": null}]}, {"text": "19. Ye, J., Qi, X., He, Y., Chen, Y., Gu, D., Gao, P., Xiao, R.: Pingan-vcgroup's solution for icdar 2021 competition on scientific literature parsing task b: Table recognition to html (2021). https://doi.org/10.48550/ARXIV.2105.01848 , https://arxiv.org/abs/2105.01848", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.76499938964844, 598.3690795898438, 480.9535217285156, 640.1014404296875], "page": 14, "span": [0, 269], "__ref_s3_data": null}]}, {"text": "20. Zhang, Z., Zhang, J., Du, J., Wang, F.: Split, embed and merge: An accurate table structure recognizer. Pattern Recognition 126 , 108565 (2022)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.35293579101562, 576.3993530273438, 480.5935363769531, 596.5462036132812], "page": 14, "span": [0, 147], "__ref_s3_data": null}]}, {"text": "21. Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. In: 2021 IEEE Winter Conference on Applications of Computer Vision (WACV). pp. 697-706 (2021). https://doi.org/10.1109/WACV48630.2021. 00074", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.2264862060547, 521.74560546875, 480.8044738769531, 574.3355712890625], "page": 14, "span": [0, 329], "__ref_s3_data": null}]}, {"text": "22. Zhong, X., ShafieiBavani, E., Jimeno Yepes, A.: Image-based table recognition: Data, model, and evaluation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. (eds.) Computer Vision - ECCV 2020. pp. 564-580. Springer International Publishing, Cham (2020)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [133.99171447753906, 477.6664123535156, 480.5955810546875, 519.9246826171875], "page": 14, "span": [0, 259], "__ref_s3_data": null}]}, {"text": "23. Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1015-1022. IEEE (2019)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [134.23336791992188, 444.7017822265625, 480.59454345703125, 475.69757080078125], "page": 14, "span": [0, 206], "__ref_s3_data": null}]}], "figures": [{"bounding-box": null, "prov": [{"bbox": [150.0213623046875, 366.15130615234375, 464.4815673828125, 583.114990234375], "page": 2, "span": [0, 574], "__ref_s3_data": null}], "text": "Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, (B) minimal graphical representation of table structure using rectangular layout, (C) HTML representation, (D) OTSL representation. This example demonstrates many of the key-features of OTSL, namely its reduced vocabulary size (12 versus 5 in this case), its reduced sequence length (55 versus 30) and a enhanced internal structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL).", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [137.5374755859375, 452.4152526855469, 476.1513366699219, 562.9699096679688], "page": 5, "span": [0, 73], "__ref_s3_data": null}], "text": "Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [164.22023010253906, 511.6170959472656, 448.9761047363281, 628.123291015625], "page": 7, "span": [0, 207], "__ref_s3_data": null}], "text": "Fig. 3. OTSL description of table structure: A - table example; B - graphical representation of table structure; C - mapping structure on a grid; D - OTSL structure encoding; E - explanation on cell encoding", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [141.4298095703125, 197.92733764648438, 472.34527587890625, 285.1344299316406], "page": 8, "span": [0, 104], "__ref_s3_data": null}], "text": "Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach.", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [162.900146484375, 128.48397827148438, 451.3374328613281, 348.21990966796875], "page": 10, "span": [0, 270], "__ref_s3_data": null}], "text": "Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap (E) than the HTML model (D), when predicting the structure of a sparse table (A), at twice the inference speed because of shorter sequence length (B),(C). \"PMC2807444_006_00.png\" PubTabNet. \u03bc", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [168.26930236816406, 157.55677795410156, 447.7568664550781, 609.8697509765625], "page": 11, "span": [0, 390], "__ref_s3_data": null}], "text": "Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern of horizontally merged cells from the GT (A), unlike the HTML model (C). The HTML model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. \"PMC5406406_003_01.png\" PubTabNet.", "type": "figure"}], "tables": [{"bounding-box": null, "prov": [{"bbox": [139.82040405273438, 322.2669982910156, 474.80023193359375, 454.9158935546875], "page": 9, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "table", "#-cols": 8, "#-rows": 7, "data": [[{"bbox": [160.3699951171875, 442.1952819824219, 168.0479278564453, 450.2650451660156], "spans": [[0, 0]], "text": "#", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [207.9739990234375, 442.1952819824219, 215.6519317626953, 450.2650451660156], "spans": [[0, 1]], "text": "#", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [239.79800415039062, 436.7162780761719, 278.3176574707031, 444.7860412597656], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [324.6700134277344, 442.1952819824219, 348.2641906738281, 450.2650451660156], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 442.1952819824219, 348.2641906738281, 450.2650451660156], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 442.1952819824219, 348.2641906738281, 450.2650451660156], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 5, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [396.27099609375, 442.1952819824219, 417.1268310546875, 450.2650451660156], "spans": [[0, 6]], "text": "mAP", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [430.77099609375, 442.1952819824219, 467.1423034667969, 450.2650451660156], "spans": [[0, 7]], "text": "Inference", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [144.5919952392578, 429.2442932128906, 183.82806396484375, 437.3140563964844], "spans": [[1, 0]], "text": "enc-layers", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [192.1949920654297, 429.2442932128906, 231.43106079101562, 437.3140563964844], "spans": [[1, 1]], "text": "dec-layers", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [239.79800415039062, 436.7162780761719, 278.3176574707031, 444.7860412597656], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [286.6860046386719, 429.2442932128906, 312.3326110839844, 437.3140563964844], "spans": [[1, 3]], "text": "simple", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [320.7019958496094, 429.2442932128906, 353.7198791503906, 437.3140563964844], "spans": [[1, 4]], "text": "complex", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [369.3059997558594, 429.2442932128906, 379.03094482421875, 437.3140563964844], "spans": [[1, 5]], "text": "all", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [394.927001953125, 431.2362976074219, 418.4727783203125, 439.3060607910156], "spans": [[1, 6]], "text": "(0.75)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.14801025390625, 431.2362976074219, 470.76055908203125, 439.3060607910156], "spans": [[1, 7]], "text": "time (secs)", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [161.906005859375, 410.4142761230469, 166.512939453125, 418.4840393066406], "spans": [[2, 0]], "text": "6", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [209.50900268554688, 410.4142761230469, 214.11593627929688, 418.4840393066406], "spans": [[2, 1]], "text": "6", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [245.17599487304688, 402.9422912597656, 272.9395446777344, 423.96405029296875], "spans": [[2, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [289.0169982910156, 402.9422912597656, 310.0037536621094, 423.96405029296875], "spans": [[2, 3]], "text": "0.965 0.969", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [326.7170104980469, 402.9422912597656, 347.7037658691406, 423.96405029296875], "spans": [[2, 4]], "text": "0.934 0.927", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [363.6759948730469, 402.9422912597656, 384.6627502441406, 423.96405029296875], "spans": [[2, 5]], "text": "0.955 0.955", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [396.20599365234375, 402.9422912597656, 417.1927490234375, 424.0268249511719], "spans": [[2, 6]], "text": "0.88 0.857", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [439.5270080566406, 402.9422912597656, 458.3842468261719, 424.0268249511719], "spans": [[2, 7]], "text": "2.73 5.39", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [161.906005859375, 384.11328125, 166.512939453125, 392.18304443359375], "spans": [[3, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [209.50900268554688, 384.11328125, 214.11593627929688, 392.18304443359375], "spans": [[3, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [245.17599487304688, 376.64129638671875, 272.9395446777344, 397.66204833984375], "spans": [[3, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [289.0169982910156, 376.64129638671875, 310.0037536621094, 397.66204833984375], "spans": [[3, 3]], "text": "0.938 0.952", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [326.7170104980469, 376.64129638671875, 347.7037658691406, 397.66204833984375], "spans": [[3, 4]], "text": "0.904 0.909", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [363.6759948730469, 389.59228515625, 384.6627502441406, 397.66204833984375], "spans": [[3, 5]], "text": "0.927", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [394.6180114746094, 389.79852294921875, 418.77886962890625, 397.7248229980469], "spans": [[3, 6]], "text": "0.853", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [439.5270080566406, 389.79852294921875, 458.3842468261719, 397.7248229980469], "spans": [[3, 7]], "text": "1.97", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [161.906005859375, 357.8122863769531, 166.512939453125, 365.8820495605469], "spans": [[4, 0]], "text": "2", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [209.50900268554688, 357.8122863769531, 214.11593627929688, 365.8820495605469], "spans": [[4, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [245.17599487304688, 350.3403015136719, 272.9395446777344, 371.3610534667969], "spans": [[4, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [289.0169982910156, 363.2912902832031, 310.0037536621094, 371.3610534667969], "spans": [[4, 3]], "text": "0.923", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [326.7170104980469, 350.3403015136719, 347.7037658691406, 371.3610534667969], "spans": [[4, 4]], "text": "0.897 0.901", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [362.0880126953125, 363.2912902832031, 386.2488708496094, 384.7738342285156], "spans": [[4, 5]], "text": "0.938 0.915", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [396.20599365234375, 376.64129638671875, 417.1927490234375, 384.7110595703125], "spans": [[4, 6]], "text": "0.843", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [440.7669982910156, 376.64129638671875, 457.1468200683594, 384.7110595703125], "spans": [[4, 7]], "text": "3.77", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": null, "spans": [[5, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [289.0169982910156, 350.3403015136719, 310.0037536621094, 358.4100646972656], "spans": [[5, 3]], "text": "0.945", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [362.0880126953125, 350.5465393066406, 386.2488708496094, 358.47283935546875], "spans": [[5, 5]], "text": "0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [394.6180114746094, 350.3403015136719, 418.77886962890625, 371.423828125], "spans": [[5, 6]], "text": "0.859 0.834", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [439.5270080566406, 350.3403015136719, 458.3842468261719, 371.423828125], "spans": [[5, 7]], "text": "1.91 3.81", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [161.906005859375, 331.5102844238281, 166.512939453125, 339.5800476074219], "spans": [[6, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [209.50900268554688, 331.5102844238281, 214.11593627929688, 339.5800476074219], "spans": [[6, 1]], "text": "2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [245.17599487304688, 324.0382995605469, 272.9395446777344, 345.06005859375], "spans": [[6, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [289.0169982910156, 324.0382995605469, 310.0037536621094, 345.06005859375], "spans": [[6, 3]], "text": "0.952 0.944", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [326.7170104980469, 324.0382995605469, 347.7037658691406, 345.06005859375], "spans": [[6, 4]], "text": "0.92 0.903", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [362.0880126953125, 324.0382995605469, 386.2488708496094, 345.1228332519531], "spans": [[6, 5]], "text": "0.942 0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [394.6180114746094, 324.0382995605469, 418.77886962890625, 345.1228332519531], "spans": [[6, 6]], "text": "0.857 0.824", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [439.5270080566406, 324.0382995605469, 458.3842468261719, 345.1228332519531], "spans": [[6, 7]], "text": "1.22 2", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [143.81715393066406, 528.7755126953125, 470.8412170410156, 635.86865234375], "page": 10, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using TableFormer [9] (with enc=6, dec=6, heads=8).", "type": "table", "#-cols": 7, "#-rows": 8, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [215.52499389648438, 617.3963012695312, 254.04464721679688, 625.4660034179688], "spans": [[0, 1], [1, 1]], "text": "Language", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [300.3970031738281, 622.851318359375, 323.9911804199219, 630.9210205078125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "TEDs", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [300.3970031738281, 622.851318359375, 323.9911804199219, 630.9210205078125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "TEDs", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [300.3970031738281, 622.851318359375, 323.9911804199219, 630.9210205078125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "TEDs", "type": "col_header", "col": 4, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [370.3450012207031, 617.371337890625, 414.7466125488281, 625.4410400390625], "spans": [[0, 5], [1, 5]], "text": "mAP(0.75)", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [423.114013671875, 611.892333984375, 466.7265625, 630.9210205078125], "spans": [[0, 6], [1, 6]], "text": "Inference time (secs)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 0, "row-header": false, "row-span": [0, 2]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [215.52499389648438, 617.3963012695312, 254.04464721679688, 625.4660034179688], "spans": [[0, 1], [1, 1]], "text": "Language", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [262.4129943847656, 609.8992919921875, 288.0596008300781, 617.968994140625], "spans": [[1, 2]], "text": "simple", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [296.4289855957031, 609.8992919921875, 329.4468688964844, 617.968994140625], "spans": [[1, 3]], "text": "complex", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [345.0329895019531, 609.8992919921875, 354.7579345703125, 617.968994140625], "spans": [[1, 4]], "text": "all", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [370.3450012207031, 617.371337890625, 414.7466125488281, 625.4410400390625], "spans": [[0, 5], [1, 5]], "text": "mAP(0.75)", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [423.114013671875, 611.892333984375, 466.7265625, 630.9210205078125], "spans": [[0, 6], [1, 6]], "text": "Inference time (secs)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [0, 2]}], [{"bbox": [154.53799438476562, 591.0703125, 201.2412872314453, 599.1400146484375], "spans": [[2, 0], [3, 0]], "text": "PubTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 4]}, {"bbox": [222.43699645996094, 596.54931640625, 247.13226318359375, 604.6190185546875], "spans": [[2, 1]], "text": "OTSL", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [264.7439880371094, 596.54931640625, 285.7307434082031, 604.6190185546875], "spans": [[2, 2]], "text": "0.965", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [302.4440002441406, 596.54931640625, 323.4307556152344, 604.6190185546875], "spans": [[2, 3]], "text": "0.934", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [339.40301513671875, 596.54931640625, 360.3897705078125, 604.6190185546875], "spans": [[2, 4]], "text": "0.955", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [383.1159973144531, 596.7554931640625, 401.9732360839844, 604.6818237304688], "spans": [[2, 5]], "text": "0.88", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [435.4930114746094, 596.7554931640625, 454.3502502441406, 604.6818237304688], "spans": [[2, 6]], "text": "2.73", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [154.53799438476562, 591.0703125, 201.2412872314453, 599.1400146484375], "spans": [[2, 0], [3, 0]], "text": "PubTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [2, 4]}, {"bbox": [220.9029998779297, 583.5983276367188, 248.66656494140625, 591.6680297851562], "spans": [[3, 1]], "text": "HTML", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [264.7439880371094, 583.5983276367188, 285.7307434082031, 591.6680297851562], "spans": [[3, 2]], "text": "0.969", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [302.4440002441406, 583.5983276367188, 323.4307556152344, 591.6680297851562], "spans": [[3, 3]], "text": "0.927", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [339.40301513671875, 583.5983276367188, 360.3897705078125, 591.6680297851562], "spans": [[3, 4]], "text": "0.955", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [382.052001953125, 583.5983276367188, 403.03875732421875, 591.6680297851562], "spans": [[3, 5]], "text": "0.857", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [436.73199462890625, 583.5983276367188, 453.11181640625, 591.6680297851562], "spans": [[3, 6]], "text": "5.39", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [155.94500732421875, 564.768310546875, 199.833740234375, 572.8380126953125], "spans": [[4, 0], [5, 0]], "text": "FinTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 6]}, {"bbox": [222.43699645996094, 570.248291015625, 247.13226318359375, 578.3179931640625], "spans": [[4, 1]], "text": "OTSL", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [264.7439880371094, 570.248291015625, 285.7307434082031, 578.3179931640625], "spans": [[4, 2]], "text": "0.955", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [302.4440002441406, 570.248291015625, 323.4307556152344, 578.3179931640625], "spans": [[4, 3]], "text": "0.961", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [337.81500244140625, 570.4544677734375, 361.9758605957031, 578.3807983398438], "spans": [[4, 4]], "text": "0.959", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [380.4639892578125, 570.4544677734375, 404.6248474121094, 578.3807983398438], "spans": [[4, 5]], "text": "0.862", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [435.4930114746094, 570.4544677734375, 454.3502502441406, 578.3807983398438], "spans": [[4, 6]], "text": "1.85", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [155.94500732421875, 564.768310546875, 199.833740234375, 572.8380126953125], "spans": [[4, 0], [5, 0]], "text": "FinTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [4, 6]}, {"bbox": [220.9029998779297, 557.2963256835938, 248.66656494140625, 565.3660278320312], "spans": [[5, 1]], "text": "HTML", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [264.7439880371094, 557.2963256835938, 285.7307434082031, 565.3660278320312], "spans": [[5, 2]], "text": "0.917", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [302.4440002441406, 557.2963256835938, 323.4307556152344, 565.3660278320312], "spans": [[5, 3]], "text": "0.922", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [341.70599365234375, 557.2963256835938, 358.0858154296875, 565.3660278320312], "spans": [[5, 4]], "text": "0.92", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [382.052001953125, 557.2963256835938, 403.03875732421875, 565.3660278320312], "spans": [[5, 5]], "text": "0.722", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [436.73199462890625, 557.2963256835938, 453.11181640625, 565.3660278320312], "spans": [[5, 6]], "text": "3.26", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [148.62600708007812, 538.4673461914062, 207.15240478515625, 546.5370483398438], "spans": [[6, 0], [7, 0]], "text": "PubTables-1M", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 8]}, {"bbox": [222.43699645996094, 543.9473266601562, 247.13226318359375, 552.0170288085938], "spans": [[6, 1]], "text": "OTSL", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [264.7439880371094, 543.9473266601562, 285.7307434082031, 552.0170288085938], "spans": [[6, 2]], "text": "0.987", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [302.4440002441406, 543.9473266601562, 323.4307556152344, 552.0170288085938], "spans": [[6, 3]], "text": "0.964", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [337.81500244140625, 544.1535034179688, 361.9758605957031, 552.079833984375], "spans": [[6, 4]], "text": "0.977", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [380.4639892578125, 544.1535034179688, 404.6248474121094, 552.079833984375], "spans": [[6, 5]], "text": "0.896", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [435.4930114746094, 544.1535034179688, 454.3502502441406, 552.079833984375], "spans": [[6, 6]], "text": "1.79", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [148.62600708007812, 538.4673461914062, 207.15240478515625, 546.5370483398438], "spans": [[6, 0], [7, 0]], "text": "PubTables-1M", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [6, 8]}, {"bbox": [220.9029998779297, 530.9953002929688, 248.66656494140625, 539.0650024414062], "spans": [[7, 1]], "text": "HTML", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [264.7439880371094, 530.9953002929688, 285.7307434082031, 539.0650024414062], "spans": [[7, 2]], "text": "0.983", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [302.4440002441406, 530.9953002929688, 323.4307556152344, 539.0650024414062], "spans": [[7, 3]], "text": "0.944", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [339.40301513671875, 530.9953002929688, 360.3897705078125, 539.0650024414062], "spans": [[7, 4]], "text": "0.966", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [382.052001953125, 530.9953002929688, 403.03875732421875, 539.0650024414062], "spans": [[7, 5]], "text": "0.889", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [436.73199462890625, 530.9953002929688, 453.11181640625, 539.0650024414062], "spans": [[7, 6]], "text": "3.26", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 7, "row-header": false, "row-span": [7, 8]}]], "model": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "2305.03393v1.pdf", "filename-prov": null, "document-hash": "62f2a2163d768d5b125a207967797aefa6c9cc113de8bb5c725c582595dd0c1d", "#-pages": 14, "collection-name": null, "description": null, "page-hashes": [{"hash": "7d7ef24bf2a048bcc229d37583b737ee85f67a02864236764abcaca9eabc8b68", "model": "default", "page": 1}, {"hash": "45bd6ad4d3e145029fa89fbf741a81d8885eb87ef03d6744221c61e66358451b", "model": "default", "page": 2}, {"hash": "69656f07bd8fb7afc53ab6f3d0e9153a337b550522493bf18d702c8406a9c545", "model": "default", "page": 3}, {"hash": "5afca9340c5bda646a75b8c2a1bde1b8f7b89e08a64a3cc4732fd11c1c6ead48", "model": "default", "page": 4}, {"hash": "d3b9daa8fd5d091fb5ef9bce44f085dd282a137e215574fec9556904b25cea8a", "model": "default", "page": 5}, {"hash": "eaaaaebf96b567c9bd5696b2dd4d747b3b3ad40e15ca8dc8968c56060315f228", "model": "default", "page": 6}, {"hash": "d786b8d564d7a7c122f2cf573f0cc1f11ea0a559d93f19cf020c11360bce00b4", "model": "default", "page": 7}, {"hash": "839d5ba3f9d079e8b42470002e4d7cb9ac60681cd9e2f2e3bf41afa6884a170e", "model": "default", "page": 8}, {"hash": "d50e5f3b8b4d1d5b04d5b253b187da6f40784bee5bf36b7eaefcabbc89e7b7a9", "model": "default", "page": 9}, {"hash": "a1509c4093fe25dbcb07c87f394506182323289a17dd189679c0b6d8238c5aae", "model": "default", "page": 10}, {"hash": "ac5ff01e648170bbe641d6fd95dc4f952a8e0bf62308f109b7c49678cef97005", "model": "default", "page": 11}, {"hash": "6a9aa589dc4faead43b032ec733af0c4a6fedfa834aa56b1bfefc7458ea949cc", "model": "default", "page": 12}, {"hash": "467ed0563b555b6fd2a0bd2e4a7bf596c066b8f08d2e1fd33f6c6d8b1c445759", "model": "default", "page": 13}, {"hash": "435efd2ece1dfed60a8dcc1f7fd72dde2cb58c59f5aebc4d5ae2227510195b42", "model": "default", "page": 14}]}, "main-text": [{"prov": [{"bbox": [16.329214096069336, 236.99996948242188, 36.6031608581543, 582.52001953125], "page": 1, "span": [0, 37], "__ref_s3_data": null}], "text": "arXiv:2305.03393v1 [cs.CV] 5 May 2023", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [134.61328125, 644.6187133789062, 480.59735107421875, 676.8052978515625], "page": 1, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [138.6561737060547, 587.6192626953125, 476.05718994140625, 623.0816650390625], "page": 1, "span": [0, 238], "__ref_s3_data": null}], "text": "Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, and Peter Staar [0000 - 0002 - 8088 - 0823]", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [222.96609497070312, 555.623046875, 392.69110107421875, 575.94482421875], "page": 1, "span": [0, 49], "__ref_s3_data": null}], "text": "IBM Research {mly,ahn,nli,cau,taa}@zurich.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [162.13674926757812, 327.2655334472656, 452.4198913574219, 522.533447265625], "page": 1, "span": [0, 1198], "__ref_s3_data": null}], "text": "Abstract. Extracting tables from documents is a crucial task in any document conversion pipeline. Recently, transformer-based models have demonstrated that table-structure can be recognized with impressive accuracy using Image-to-Markup-Sequence (Im2Seq) approaches. Taking only the image of a table, such models predict a sequence of tokens (e.g. in HTML, LaTeX) which represent the structure of the table. Since the token representation of the table structure has a significant impact on the accuracy and run-time performance of any Im2Seq model, we investigate in this paper how table-structure representation can be optimised. We propose a new, optimised table-structure language (OTSL) with a minimized vocabulary and specific rules. The benefits of OTSL are that it reduces the number of tokens to 5 (HTML needs 28+) and shortens the sequence length to half of HTML on average. Consequently, model accuracy improves significantly, inference time is halved compared to HTML-based models, and the predicted table structures are always syntactically correct. This in turn eliminates most post-processing needs. Popular table structure data-sets will be published in OTSL format to the community.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [162.6794891357422, 293.8035888671875, 452.2415771484375, 314.24090576171875], "page": 1, "span": [0, 90], "__ref_s3_data": null}], "text": "Keywords: Table Structure Recognition \u00b7 Data Representation \u00b7 Transformers \u00b7 Optimization.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.76512145996094, 259.3119201660156, 228.933837890625, 270.5150451660156], "page": 1, "span": [0, 14], "__ref_s3_data": null}], "text": "1 Introduction", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [134.01023864746094, 163.12771606445312, 480.595947265625, 244.2879638671875], "page": 1, "span": [0, 500], "__ref_s3_data": null}], "text": "Tables are ubiquitous in documents such as scientific papers, patents, reports, manuals, specification sheets or marketing material. They often encode highly valuable information and therefore need to be extracted with high accuracy. Unfortunately, tables appear in documents in various sizes, styling and structure, making it difficult to recover their correct structure with simple analytical methods. Therefore, accurate table extraction is achieved these days with machine-learning based methods.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.044189453125, 126.84117889404297, 480.74835205078125, 160.30677795410156], "page": 1, "span": [0, 235], "__ref_s3_data": null}], "text": "In modern document understanding systems [1,15], table extraction is typically a two-step process. Firstly, every table on a page is located with a bounding box, and secondly, their logical row and column structure is recognized. As of", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.28973388671875, 690.1593017578125, 139.494384765625, 698.4556884765625], "page": 2, "span": [0, 1], "__ref_s3_data": null}], "text": "2", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [167.312744140625, 689.8800048828125, 231.72227478027344, 699.0272827148438], "page": 2, "span": [0, 16], "__ref_s3_data": null}], "text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.99227905273438, 591.5379028320312, 480.7561950683594, 666.4251098632812], "page": 2, "span": [0, 574], "__ref_s3_data": null}], "text": "Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, (B) minimal graphical representation of table structure using rectangular layout, (C) HTML representation, (D) OTSL representation. This example demonstrates many of the key-features of OTSL, namely its reduced vocabulary size (12 versus 5 in this case), its reduced sequence length (55 versus 30) and a enhanced internal structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL).", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"prov": [{"bbox": [133.9597930908203, 270.46295166015625, 480.5923156738281, 340.515380859375], "page": 2, "span": [0, 435], "__ref_s3_data": null}], "text": "today, table detection in documents is a well understood problem, and the latest state-of-the-art (SOTA) object detection methods provide an accuracy comparable to human observers [7,8,10,14,23]. On the other hand, the problem of table structure recognition (TSR) is a lot more challenging and remains a very active area of research, in which many novel machine learning algorithms are being explored [3,4,5,9,11,12,13,14,17,18,21,22].", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.86209106445312, 126.80567932128906, 480.5948181152344, 268.64990234375], "page": 2, "span": [0, 911], "__ref_s3_data": null}], "text": "Recently emerging SOTA methods for table structure recognition employ transformer-based models, in which an image of the table is provided to the network in order to predict the structure of the table as a sequence of tokens. These image-to-sequence (Im2Seq) models are extremely powerful, since they allow for a purely data-driven solution. The tokens of the sequence typically belong to a markup language such as HTML, Latex or Markdown, which allow to describe table structure as rows, columns and spanning cells in various configurations. In Figure 1, we illustrate how HTML is used to represent the table-structure of a particular example table. Public table-structure data sets such as PubTabNet [22], and FinTabNet [21], which were created in a semi-automated way from paired PDF and HTML sources (e.g. PubMed Central), popularized primarily the use of HTML as ground-truth representation format for TSR.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [194.0343780517578, 689.6653442382812, 447.54290771484375, 698.948486328125], "page": 3, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [474.95513916015625, 690.1593017578125, 480.59124755859375, 698.3677978515625], "page": 3, "span": [0, 1], "__ref_s3_data": null}], "text": "3", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.981201171875, 579.9556884765625, 480.7418212890625, 673.815185546875], "page": 3, "span": [0, 584], "__ref_s3_data": null}], "text": "While the majority of research in TSR is currently focused on the development and application of novel neural model architectures, the table structure representation language (e.g. HTML in PubTabNet and FinTabNet) is usually adopted as is for the sequence tokenization in Im2Seq models. In this paper, we aim for the opposite and investigate the impact of the table structure representation language with an otherwise unmodified Im2Seq transformer-based architecture. Since the current state-of-the-art Im2Seq model is TableFormer [9], we select this model to perform our experiments.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.7724151611328, 460.7701416015625, 480.87481689453125, 577.6600341796875], "page": 3, "span": [0, 721], "__ref_s3_data": null}], "text": "The main contribution of this paper is the introduction of a new optimised table structure language (OTSL), specifically designed to describe table-structure in an compact and structured way for Im2Seq models. OTSL has a number of key features, which make it very attractive to use in Im2Seq models. Specifically, compared to other languages such as HTML, OTSL has a minimized vocabulary which yields short sequence length, strong inherent structure (e.g. strict rectangular layout) and a strict syntax with rules that only look backwards. The latter allows for syntax validation during inference and ensures a syntactically correct table-structure. These OTSL features are illustrated in Figure 1, in comparison to HTML.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.7509765625, 352.1451110839844, 480.6080017089844, 458.64886474609375], "page": 3, "span": [0, 626], "__ref_s3_data": null}], "text": "The paper is structured as follows. In section 2, we give an overview of the latest developments in table-structure reconstruction. In section 3 we review the current HTML table encoding (popularised by PubTabNet and FinTabNet) and discuss its flaws. Subsequently, we introduce OTSL in section 4, which includes the language definition, syntax rules and error-correction procedures. In section 5, we apply OTSL on the TableFormer architecture, compare it to TableFormer models trained on HTML and ultimately demonstrate the advantages of using OTSL. Finally, in section 6 we conclude our work and outline next potential steps.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.4993896484375, 319.3436584472656, 236.76913452148438, 330.5750732421875], "page": 3, "span": [0, 14], "__ref_s3_data": null}], "text": "2 Related Work", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.65347290039062, 126.65711212158203, 484.1204833984375, 304.6298522949219], "page": 3, "span": [0, 1161], "__ref_s3_data": null}], "text": "Approaches to formalize the logical structure and layout of tables in electronic documents date back more than two decades [16]. In the recent past, a wide variety of computer vision methods have been explored to tackle the problem of table structure recognition, i.e. the correct identification of columns, rows and spanning cells in a given table. Broadly speaking, the current deeplearning based approaches fall into three categories: object detection (OD) methods, Graph-Neural-Network (GNN) methods and Image-to-Markup-Sequence (Im2Seq) methods. Object-detection based methods [11,12,13,14,21] rely on tablestructure annotation using (overlapping) bounding boxes for training, and produce bounding-box predictions to define table cells, rows, and columns on a table image. Graph Neural Network (GNN) based methods [3,6,17,18], as the name suggests, represent tables as graph structures. The graph nodes represent the content of each table cell, an embedding vector from the table image, or geometric coordinates of the table cell. The edges of the graph define the relationship between the nodes, e.g. if they belong to the same column, row, or table cell.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.52096557617188, 690.1593017578125, 231.72227478027344, 699.0346069335938], "page": 4, "span": [0, 18], "__ref_s3_data": null}], "text": "4 M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.7613983154297, 532.5480346679688, 480.6270446777344, 674.1491088867188], "page": 4, "span": [0, 939], "__ref_s3_data": null}], "text": "Other work [20] aims at predicting a grid for each table and deciding which cells must be merged using an attention network. Im2Seq methods cast the problem as a sequence generation task [4,5,9,22], and therefore need an internal tablestructure representation language, which is often implemented with standard markup languages (e.g. HTML, LaTeX, Markdown). In theory, Im2Seq methods have a natural advantage over the OD and GNN methods by virtue of directly predicting the table-structure. As such, no post-processing or rules are needed in order to obtain the table-structure, which is necessary with OD and GNN approaches. In practice, this is not entirely true, because a predicted sequence of table-structure markup does not necessarily have to be syntactically correct. Hence, depending on the quality of the predicted sequence, some post-processing needs to be performed to ensure a syntactically valid (let alone correct) sequence.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.5825958251953, 305.3533020019531, 480.7930908203125, 530.6050415039062], "page": 4, "span": [0, 1404], "__ref_s3_data": null}], "text": "Within the Im2Seq method, we find several popular models, namely the encoder-dual-decoder model (EDD) [22], TableFormer [9], Tabsplitter[2] and Ye et. al. [19]. EDD uses two consecutive long short-term memory (LSTM) decoders to predict a table in HTML representation. The tag decoder predicts a sequence of HTML tags. For each decoded table cell ( ), the attention is passed to the cell decoder to predict the content with an embedded OCR approach. The latter makes it susceptible to transcription errors in the cell content of the table. TableFormer address this reliance on OCR and uses two transformer decoders for HTML structure and cell bounding box prediction in an end-to-end architecture. The predicted cell bounding box is then used to extract text tokens from an originating (digital) PDF page, circumventing any need for OCR. TabSplitter [2] proposes a compact double-matrix representation of table rows and columns to do error detection and error correction of HTML structure sequences based on predictions from [19]. This compact double-matrix representation can not be used directly by the Img2seq model training, so the model uses HTML as an intermediate form. Chi et. al. [4] introduce a data set and a baseline method using bidirectional LSTMs to predict LaTeX code. Kayal [5] introduces Gated ResNet transformers to predict LaTeX code, and a separate OCR module to extract content.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.88829040527344, 209.4513397216797, 480.5937805175781, 303.2884216308594], "page": 4, "span": [0, 572], "__ref_s3_data": null}], "text": "Im2Seq approaches have shown to be well-suited for the TSR task and allow a full end-to-end network design that can output the final table structure without pre- or post-processing logic. Furthermore, Im2Seq models have demonstrated to deliver state-of-the-art prediction accuracy [9]. This motivated the authors to investigate if the performance (both in accuracy and inference time) can be further improved by optimising the table structure representation language. We believe this is a necessary step before further improving neural network architectures for this task.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.42018127441406, 175.88177490234375, 269.6244201660156, 186.8051300048828], "page": 4, "span": [0, 19], "__ref_s3_data": null}], "text": "3 Problem Statement", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.80313110351562, 126.69752502441406, 480.59368896484375, 160.46705627441406], "page": 4, "span": [0, 233], "__ref_s3_data": null}], "text": "All known Im2Seq based models for TSR fundamentally work in similar ways. Given an image of a table, the Im2Seq model predicts the structure of the table by generating a sequence of tokens. These tokens originate from a finite vocab-", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [194.02210998535156, 689.8338623046875, 447.54290771484375, 698.9061889648438], "page": 5, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [475.1318664550781, 690.1593017578125, 480.59124755859375, 698.4717407226562], "page": 5, "span": [0, 1], "__ref_s3_data": null}], "text": "5", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.90025329589844, 604.4931640625, 480.7872619628906, 673.93798828125], "page": 5, "span": [0, 422], "__ref_s3_data": null}], "text": "ulary and can be interpreted as a table structure. For example, with the HTML tokens ,
, , , and , one can construct simple table structures without any spanning cells. In reality though, one needs at least 28 HTML tokens to describe the most common complex tables observed in real-world documents [21,22], due to a variety of spanning cells definitions in the HTML token vocabulary.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [145.19676208496094, 562.5794677734375, 469.7522277832031, 571.8128051757812], "page": 5, "span": [0, 73], "__ref_s3_data": null}], "text": "Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"prov": [{"bbox": [133.7060546875, 259.57940673828125, 480.62744140625, 424.87249755859375], "page": 5, "span": [0, 1021], "__ref_s3_data": null}], "text": "Obviously, HTML and other general-purpose markup languages were not designed for Im2Seq models. As such, they have some serious drawbacks. First, the token vocabulary needs to be artificially large in order to describe all plausible tabular structures. Since most Im2Seq models use an autoregressive approach, they generate the sequence token by token. Therefore, to reduce inference time, a shorter sequence length is critical. Every table-cell is represented by at least two tokens ( and ). Furthermore, when tokenizing the HTML structure, one needs to explicitly enumerate possible column-spans and row-spans as words. In practice, this ends up requiring 28 different HTML tokens (when including column- and row-spans up to 10 cells) just to describe every table in the PubTabNet dataset. Clearly, not every token is equally represented, as is depicted in Figure 2. This skewed distribution of tokens in combination with variable token row-length makes it challenging for models to learn the HTML structure.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.89939880371094, 210.46835327148438, 480.5928955078125, 257.10150146484375], "page": 5, "span": [0, 313], "__ref_s3_data": null}], "text": "Additionally, it would be desirable if the representation would easily allow an early detection of invalid sequences on-the-go, before the prediction of the entire table structure is completed. HTML is not well-suited for this purpose as the verification of incomplete sequences is non-trivial or even impossible.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.75929260253906, 126.89654541015625, 480.5947265625, 208.89126586914062], "page": 5, "span": [0, 542], "__ref_s3_data": null}], "text": "In a valid HTML table, the token sequence must describe a 2D grid of table cells, serialised in row-major ordering, where each row and each column have the same length (while considering row- and column-spans). Furthermore, every opening tag in HTML needs to be matched by a closing tag in a correct hierarchical manner. Since the number of tokens for each table row and column can vary significantly, especially for large tables with many row- and column-spans, it is complex to verify the consistency of predicted structures during sequence", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.12826538085938, 690.1593017578125, 139.453125, 698.234130859375], "page": 6, "span": [0, 1], "__ref_s3_data": null}], "text": "6", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [167.2993927001953, 690.0819091796875, 231.72227478027344, 698.99951171875], "page": 6, "span": [0, 16], "__ref_s3_data": null}], "text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.94253540039062, 651.3041381835938, 480.59478759765625, 673.705078125], "page": 6, "span": [0, 132], "__ref_s3_data": null}], "text": "generation. Implicitly, this also means that Im2Seq models need to learn these complex syntax rules, simply to deliver valid output.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.64344787597656, 496.2580871582031, 480.595703125, 649.443603515625], "page": 6, "span": [0, 977], "__ref_s3_data": null}], "text": "In practice, we observe two major issues with prediction quality when training Im2Seq models on HTML table structure generation from images. On the one hand, we find that on large tables, the visual attention of the model often starts to drift and is not accurately moving forward cell by cell anymore. This manifests itself in either in an increasing location drift for proposed table-cells in later rows on the same column or even complete loss of vertical alignment, as illustrated in Figure 5. Addressing this with post-processing is partially possible, but clearly undesired. On the other hand, we find many instances of predictions with structural inconsistencies or plain invalid HTML output, as shown in Figure 6, which are nearly impossible to properly correct. Both problems seriously impact the TSR model performance, since they reflect not only in the task of pure structure recognition but also in the equally crucial recognition or matching of table cell content.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.07444763183594, 460.4577331542969, 372.50848388671875, 472.3045959472656], "page": 6, "span": [0, 36], "__ref_s3_data": null}], "text": "4 Optimised Table Structure Language", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.82858276367188, 350.400146484375, 480.5947265625, 443.65216064453125], "page": 6, "span": [0, 563], "__ref_s3_data": null}], "text": "To mitigate the issues with HTML in Im2Seq-based TSR models laid out before, we propose here our Optimised Table Structure Language (OTSL). OTSL is designed to express table structure with a minimized vocabulary and a simple set of rules, which are both significantly reduced compared to HTML. At the same time, OTSL enables easy error detection and correction during sequence generation. We further demonstrate how the compact structure representation and minimized sequence length improves prediction accuracy and inference time in the TableFormer architecture.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.0214385986328, 316.9593811035156, 261.80108642578125, 326.9925231933594], "page": 6, "span": [0, 23], "__ref_s3_data": null}], "text": "4.1 Language Definition", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [134.03182983398438, 269.9826354980469, 480.5887145996094, 303.5955505371094], "page": 6, "span": [0, 165], "__ref_s3_data": null}], "text": "In Figure 3, we illustrate how the OTSL is defined. In essence, the OTSL defines only 5 tokens that directly describe a tabular structure based on an atomic 2D grid.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [149.35653686523438, 256.95648193359375, 409.3113708496094, 266.98114013671875], "page": 6, "span": [0, 57], "__ref_s3_data": null}], "text": "The OTSL vocabulary is comprised of the following tokens:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [139.9448699951172, 235.22317504882812, 460.54443359375, 245.30445861816406], "page": 6, "span": [0, 72], "__ref_s3_data": null}], "text": "-\"C\" cell a new table cell that either has or does not have cell content", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [139.9716796875, 210.11834716796875, 480.59393310546875, 232.8718719482422], "page": 6, "span": [0, 82], "__ref_s3_data": null}], "text": "-\"L\" cell left-looking cell , merging with the left neighbor cell to create a span", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [140.17970275878906, 184.99545288085938, 480.58856201171875, 207.94252014160156], "page": 6, "span": [0, 81], "__ref_s3_data": null}], "text": "-\"U\" cell up-looking cell , merging with the upper neighbor cell to create a span", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [139.92364501953125, 172.88253784179688, 454.5549621582031, 183.41383361816406], "page": 6, "span": [0, 71], "__ref_s3_data": null}], "text": "-\"X\" cell cross cell , to merge with both left and upper neighbor cells", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [139.87696838378906, 160.93917846679688, 328.61676025390625, 170.83633422851562], "page": 6, "span": [0, 40], "__ref_s3_data": null}], "text": "-\"NL\" new-line , switch to the next row.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.19346618652344, 127.14515686035156, 480.5928039550781, 148.89442443847656], "page": 6, "span": [0, 99], "__ref_s3_data": null}], "text": "A notable attribute of OTSL is that it has the capability of achieving lossless conversion to HTML.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [193.9747772216797, 689.7752685546875, 447.54290771484375, 698.8756103515625], "page": 7, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [475.3976135253906, 690.1593017578125, 480.59124755859375, 698.609375], "page": 7, "span": [0, 1], "__ref_s3_data": null}], "text": "7", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.8881378173828, 635.6204833984375, 480.58740234375, 667.1154174804688], "page": 7, "span": [0, 207], "__ref_s3_data": null}], "text": "Fig. 3. OTSL description of table structure: A - table example; B - graphical representation of table structure; C - mapping structure on a grid; D - OTSL structure encoding; E - explanation on cell encoding", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"prov": [{"bbox": [134.2874298095703, 477.7056579589844, 246.78787231445312, 487.5195007324219], "page": 7, "span": [0, 19], "__ref_s3_data": null}], "text": "4.2 Language Syntax", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [134.23097229003906, 457.80255126953125, 363.7961730957031, 467.56781005859375], "page": 7, "span": [0, 51], "__ref_s3_data": null}], "text": "The OTSL representation follows these syntax rules:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [138.97299194335938, 424.0662536621094, 480.5890197753906, 445.8700256347656], "page": 7, "span": [0, 108], "__ref_s3_data": null}], "text": "1. Left-looking cell rule : The left neighbour of an \"L\" cell must be either another \"L\" cell or a \"C\" cell.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.19281005859375, 400.15325927734375, 480.59228515625, 421.95819091796875], "page": 7, "span": [0, 106], "__ref_s3_data": null}], "text": "2. Up-looking cell rule : The upper neighbour of a \"U\" cell must be either another \"U\" cell or a \"C\" cell.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.06527709960938, 388.19525146484375, 226.0736083984375, 397.4916687011719], "page": 7, "span": [0, 20], "__ref_s3_data": null}], "text": "3. Cross cell rule :", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [146.40036010742188, 352.3262939453125, 480.5923767089844, 396.9922180175781], "page": 7, "span": [0, 169], "__ref_s3_data": null}], "text": ": The left neighbour of an \"X\" cell must be either another \"X\" cell or a \"U\" cell, and the upper neighbour of an \"X\" cell must be either another \"X\" cell or an \"L\" cell.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.39491271972656, 339.79541015625, 474.5901794433594, 349.8867492675781], "page": 7, "span": [0, 78], "__ref_s3_data": null}], "text": "4. First row rule : Only \"L\" cells and \"C\" cells are allowed in the first row.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.3254852294922, 316.4543151855469, 480.58746337890625, 338.0946960449219], "page": 7, "span": [0, 84], "__ref_s3_data": null}], "text": "5. First column rule : Only \"U\" cells and \"C\" cells are allowed in the first column.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.22427368164062, 292.2819519042969, 480.5945739746094, 314.491455078125], "page": 7, "span": [0, 144], "__ref_s3_data": null}], "text": "6. Rectangular rule : The table representation is always rectangular - all rows must have an equal number of tokens, terminated with \"NL\" token.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [133.6158447265625, 149.74966430664062, 480.5958251953125, 280.5412292480469], "page": 7, "span": [0, 848], "__ref_s3_data": null}], "text": "The application of these rules gives OTSL a set of unique properties. First of all, the OTSL enforces a strictly rectangular structure representation, where every new-line token starts a new row. As a consequence, all rows and all columns have exactly the same number of tokens, irrespective of cell spans. Secondly, the OTSL representation is unambiguous: Every table structure is represented in one way. In this representation every table cell corresponds to a \"C\"-cell token, which in case of spans is always located in the top-left corner of the table cell definition. Third, OTSL syntax rules are only backward-looking. As a consequence, every predicted token can be validated straight during sequence generation by looking at the previously predicted sequence. As such, OTSL can guarantee that every predicted sequence is syntactically valid.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.04405212402344, 126.91014099121094, 480.5926513671875, 148.8981170654297], "page": 7, "span": [0, 153], "__ref_s3_data": null}], "text": "These characteristics can be easily learned by sequence generator networks, as we demonstrate further below. We find strong indications that this pattern", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.1900634765625, 690.1593017578125, 139.46353149414062, 698.3311767578125], "page": 8, "span": [0, 1], "__ref_s3_data": null}], "text": "8", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [167.40870666503906, 690.0598754882812, 231.72227478027344, 699.074462890625], "page": 8, "span": [0, 16], "__ref_s3_data": null}], "text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [134.2002410888672, 651.7838745117188, 480.5888366699219, 673.7068481445312], "page": 8, "span": [0, 84], "__ref_s3_data": null}], "text": "reduces significantly the column drift seen in the HTML based models (see Figure 5).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.25576782226562, 620.8721313476562, 319.3470764160156, 630.8031005859375], "page": 8, "span": [0, 35], "__ref_s3_data": null}], "text": "4.3 Error-detection and -mitigation", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.90631103515625, 492.9853515625, 480.59576416015625, 610.5565185546875], "page": 8, "span": [0, 797], "__ref_s3_data": null}], "text": "The design of OTSL allows to validate a table structure easily on an unfinished sequence. The detection of an invalid sequence token is a clear indication of a prediction mistake, however a valid sequence by itself does not guarantee prediction correctness. Different heuristics can be used to correct token errors in an invalid sequence and thus increase the chances for accurate predictions. Such heuristics can be applied either after the prediction of each token, or at the end on the entire predicted sequence. For example a simple heuristic which can correct the predicted OTSL sequence on-the-fly is to verify if the token with the highest prediction confidence invalidates the predicted sequence, and replace it by the token with the next highest confidence until OTSL rules are satisfied.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.63143920898438, 459.85089111328125, 229.03533935546875, 471.56646728515625], "page": 8, "span": [0, 13], "__ref_s3_data": null}], "text": "5 Experiments", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.63893127441406, 339.67877197265625, 480.6024475097656, 445.8916015625], "page": 8, "span": [0, 684], "__ref_s3_data": null}], "text": "To evaluate the impact of OTSL on prediction accuracy and inference times, we conducted a series of experiments based on the TableFormer model (Figure 4) with two objectives: Firstly we evaluate the prediction quality and performance of OTSL vs. HTML after performing Hyper Parameter Optimization (HPO) on the canonical PubTabNet data set. Secondly we pick the best hyper-parameters found in the first step and evaluate how OTSL impacts the performance of TableFormer after training on other publicly available data sets (FinTabNet, PubTables-1M [14]). The ground truth (GT) from all data sets has been converted into OTSL format for this purpose, and will be made publicly available.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.0367889404297, 287.69140625, 480.5908203125, 308.2715148925781], "page": 8, "span": [0, 104], "__ref_s3_data": null}], "text": "Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"prov": [{"bbox": [133.83853149414062, 126.85651397705078, 480.59173583984375, 172.45193481445312], "page": 8, "span": [0, 299], "__ref_s3_data": null}], "text": "We rely on standard metrics such as Tree Edit Distance score (TEDs) for table structure prediction, and Mean Average Precision (mAP) with 0.75 Intersection Over Union (IOU) threshold for the bounding-box predictions of table cells. The predicted OTSL structures were converted back to HTML format in", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [193.94395446777344, 689.7586669921875, 447.54290771484375, 698.8834228515625], "page": 9, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [474.9051818847656, 690.1593017578125, 480.59124755859375, 698.5001831054688], "page": 9, "span": [0, 1], "__ref_s3_data": null}], "text": "9", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [133.90585327148438, 640.3582153320312, 480.5957946777344, 673.7608642578125], "page": 9, "span": [0, 163], "__ref_s3_data": null}], "text": "order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.28504943847656, 613.6966552734375, 318.44842529296875, 623.6006469726562], "page": 9, "span": [0, 32], "__ref_s3_data": null}], "text": "5.1 Hyper Parameter Optimization", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.80441284179688, 537.6300659179688, 481.1519775390625, 607.1452026367188], "page": 9, "span": [0, 423], "__ref_s3_data": null}], "text": "We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple and complex tables (tables with cell spans). Results are presented in Table. 1. It is evident that with OTSL, our model achieves the same TED score and slightly better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.88543701171875, 464.55596923828125, 480.59539794921875, 517.7815551757812], "page": 9, "span": [0, 398], "__ref_s3_data": null}], "text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"prov": [{"bbox": [134.48985290527344, 274.2215881347656, 264.4033203125, 284.3811950683594], "page": 9, "span": [0, 24], "__ref_s3_data": null}], "text": "5.2 Quantitative Results", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.97792053222656, 174.46827697753906, 480.59576416015625, 268.4878234863281], "page": 9, "span": [0, 555], "__ref_s3_data": null}], "text": "We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained and evaluated it on three publicly available data sets: PubTabNet (395k samples), FinTabNet (113k samples) and PubTables-1M (about 1M samples). Performance results are presented in Table. 2. It is clearly evident that the model trained on OTSL outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.90371704101562, 126.73831176757812, 480.6639099121094, 172.7313995361328], "page": 9, "span": [0, 289], "__ref_s3_data": null}], "text": "Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.6792755126953, 690.1593017578125, 144.2487335205078, 698.4376831054688], "page": 10, "span": [0, 2], "__ref_s3_data": null}], "text": "10", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [167.2496337890625, 690.1593017578125, 231.72048950195312, 699.0352783203125], "page": 10, "span": [0, 16], "__ref_s3_data": null}], "text": "M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [134.00595092773438, 645.5076904296875, 480.59356689453125, 677.1614379882812], "page": 10, "span": [0, 192], "__ref_s3_data": null}], "text": "Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using TableFormer [9] (with enc=6, dec=6, heads=8).", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"prov": [{"bbox": [134.25314331054688, 493.7161560058594, 257.19561767578125, 503.76678466796875], "page": 10, "span": [0, 23], "__ref_s3_data": null}], "text": "5.3 Qualitative Results", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.7931365966797, 425.5223083496094, 480.6096496582031, 483.0732421875], "page": 10, "span": [0, 309], "__ref_s3_data": null}], "text": "To illustrate the qualitative differences between OTSL and HTML, Figure 5 demonstrates less overlap and more accurate bounding boxes with OTSL. In Figure 6, OTSL proves to be more effective in handling tables with longer token sequences, resulting in even more precise structure prediction and bounding boxes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.934326171875, 352.2828369140625, 480.591064453125, 395.2126770019531], "page": 10, "span": [0, 270], "__ref_s3_data": null}], "text": "Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap (E) than the HTML model (D), when predicting the structure of a sparse table (A), at twice the inference speed because of shorter sequence length (B),(C). \"PMC2807444_006_00.png\" PubTabNet. \u03bc", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"prov": [{"bbox": [227.91465759277344, 116.65360260009766, 230.10028076171875, 126.1739730834961], "page": 10, "span": [0, 1], "__ref_s3_data": null}], "text": "\u03bc", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [300.58056640625, 98.57134246826172, 302.72637939453125, 108.3780517578125], "page": 10, "span": [0, 1], "__ref_s3_data": null}], "text": "\u2265", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [194.172119140625, 689.804443359375, 447.54290771484375, 698.850830078125], "page": 11, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [471.22021484375, 690.1593017578125, 480.5894775390625, 698.3983154296875], "page": 11, "span": [0, 2], "__ref_s3_data": null}], "text": "11", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [134.00157165527344, 613.6331176757812, 480.82830810546875, 667.0059814453125], "page": 11, "span": [0, 390], "__ref_s3_data": null}], "text": "Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern of horizontally merged cells from the GT (A), unlike the HTML model (C). The HTML model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. \"PMC5406406_003_01.png\" PubTabNet.", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"prov": [{"bbox": [134.69354248046875, 690.152099609375, 231.72048950195312, 698.9852905273438], "page": 12, "span": [0, 19], "__ref_s3_data": null}], "text": "12 M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [134.32138061523438, 663.8826293945312, 219.25479125976562, 675.0826416015625], "page": 12, "span": [0, 12], "__ref_s3_data": null}], "text": "6 Conclusion", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [134.07997131347656, 588.5181884765625, 480.595703125, 645.8515014648438], "page": 12, "span": [0, 330], "__ref_s3_data": null}], "text": "We demonstrated that representing tables in HTML for the task of table structure recognition with Im2Seq models is ill-suited and has serious limitations. Furthermore, we presented in this paper an Optimized Table Structure Language (OTSL) which, when compared to commonly used general purpose languages, has several key benefits.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.63015747070312, 467.4183654785156, 480.6451416015625, 585.736328125], "page": 12, "span": [0, 724], "__ref_s3_data": null}], "text": "First and foremost, given the same network configuration, inference time for a table-structure prediction is about 2 times faster compared to the conventional HTML approach. This is primarily owed to the shorter sequence length of the OTSL representation. Additional performance benefits can be obtained with HPO (hyper parameter optimization). As we demonstrate in our experiments, models trained on OTSL can be significantly smaller, e.g. by reducing the number of encoder and decoder layers, while preserving comparatively good prediction quality. This can further improve inference performance, yielding 5-6 times faster inference speed in OTSL with prediction quality comparable to models trained on HTML (see Table 1).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [133.8241424560547, 323.7073974609375, 480.5948181152344, 465.1226806640625], "page": 12, "span": [0, 926], "__ref_s3_data": null}], "text": "Secondly, OTSL has more inherent structure and a significantly restricted vocabulary size. This allows autoregressive models to perform better in the TED metric, but especially with regards to prediction accuracy of the table-cell bounding boxes (see Table 2). As shown in Figure 5, we observe that the OTSL drastically reduces the drift for table cell bounding boxes at high row count and in sparse tables. This leads to more accurate predictions and a significant reduction in post-processing complexity, which is an undesired necessity in HTML-based Im2Seq models. Significant novelty lies in OTSL syntactical rules, which are few, simple and always backwards looking. Each new token can be validated only by analyzing the sequence of previous tokens, without requiring the entire sequence to detect mistakes. This in return allows to perform structural error detection and correction on-the-fly during sequence generation.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [134.31680297851562, 287.61077880859375, 197.68641662597656, 298.98321533203125], "page": 12, "span": [0, 10], "__ref_s3_data": null}], "text": "References", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [139.37100219726562, 227.38706970214844, 480.5920104980469, 269.8235168457031], "page": 12, "span": [0, 270], "__ref_s3_data": null}], "text": "1. Auer, C., Dolfi, M., Carvalho, A., Ramis, C.B., Staar, P.W.J.: Delivering document conversion as a cloud service with high throughput and responsiveness. CoRR abs/2206.00785 (2022). https://doi.org/10.48550/arXiv.2206.00785 , https://doi.org/10.48550/arXiv.2206.00785", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.86715698242188, 182.8286590576172, 480.6174011230469, 225.87879943847656], "page": 12, "span": [0, 301], "__ref_s3_data": null}], "text": "2. Chen, B., Peng, D., Zhang, J., Ren, Y., Jin, L.: Complex table structure recognition in the wild using transformer and identity matrix-based augmentation. In: Porwal, U., Forn\u00e9s, A., Shafait, F. (eds.) Frontiers in Handwriting Recognition. pp. 545561. Springer International Publishing, Cham (2022)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.72738647460938, 160.16236877441406, 480.5873107910156, 181.41339111328125], "page": 12, "span": [0, 140], "__ref_s3_data": null}], "text": "3. Chi, Z., Huang, H., Xu, H.D., Yu, H., Yin, W., Mao, X.L.: Complicated table structure recognition. arXiv preprint arXiv:1908.04729 (2019)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.9593963623047, 126.65552520751953, 480.5882568359375, 157.8516387939453], "page": 12, "span": [0, 204], "__ref_s3_data": null}], "text": "4. Deng, Y., Rosenberg, D., Mann, G.: Challenges in end-to-end neural scientific table recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 894-901. IEEE (2019)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [194.0724639892578, 689.6328735351562, 447.54290771484375, 698.8519287109375], "page": 13, "span": [0, 60], "__ref_s3_data": null}], "text": "Optimized Table Tokenization for Table Structure Recognition", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [471.1661376953125, 690.1593017578125, 480.5894775390625, 698.4201049804688], "page": 13, "span": [0, 2], "__ref_s3_data": null}], "text": "13", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [138.6960906982422, 641.0914306640625, 480.59478759765625, 672.9320068359375], "page": 13, "span": [0, 203], "__ref_s3_data": null}], "text": "5. Kayal, P., Anand, M., Desai, H., Singh, M.: Tables to latex: structure and content extraction from scientific tables. International Journal on Document Analysis and Recognition (IJDAR) pp. 1-10 (2022)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.54495239257812, 598.4913940429688, 480.7531433105469, 640.2967529296875], "page": 13, "span": [0, 264], "__ref_s3_data": null}], "text": "6. Lee, E., Kwon, J., Yang, H., Park, J., Lee, S., Koo, H.I., Cho, N.I.: Table structure recognition based on grid shape graph. In: 2022 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). pp. 18681873. IEEE (2022)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [139.07086181640625, 576.4161376953125, 480.5901184082031, 596.6123046875], "page": 13, "span": [0, 131], "__ref_s3_data": null}], "text": "7. Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: A benchmark dataset for table detection and recognition (2019)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.5443878173828, 521.7116088867188, 480.8269348144531, 574.5029296875], "page": 13, "span": [0, 345], "__ref_s3_data": null}], "text": "8. Livathinos, N., Berrospi, C., Lysak, M., Kuropiatnyk, V., Nassar, A., Carvalho, A., Dolfi, M., Auer, C., Dinkla, K., Staar, P.: Robust pdf document conversion using recurrent neural networks. Proceedings of the AAAI Conference on Artificial Intelligence 35 (17), 15137-15145 (May 2021), https://ojs.aaai.org/index.php/ AAAI/article/view/17777", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [138.21878051757812, 487.909423828125, 480.5938720703125, 519.8042602539062], "page": 13, "span": [0, 234], "__ref_s3_data": null}], "text": "9. Nassar, A., Livathinos, N., Lysak, M., Staar, P.: Tableformer: Table structure understanding with transformers. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4614-4623 (June 2022)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.7440185546875, 422.8146057128906, 480.6158447265625, 486.7056579589844], "page": 13, "span": [0, 413], "__ref_s3_data": null}], "text": "10. Pfitzmann, B., Auer, C., Dolfi, M., Nassar, A.S., Staar, P.W.J.: Doclaynet: A large human-annotated dataset for document-layout segmentation. In: Zhang, A., Rangwala, H. (eds.) KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, August 14 - 18, 2022. pp. 3743-3751. ACM (2022). https://doi.org/10.1145/3534678.3539043 , https:// doi.org/10.1145/3534678.3539043", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.48020935058594, 378.9383850097656, 480.59295654296875, 421.14239501953125], "page": 13, "span": [0, 295], "__ref_s3_data": null}], "text": "11. Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from imagebased documents. In: Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops. pp. 572-573 (2020)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.6136016845703, 334.68109130859375, 480.6297302246094, 377.08355712890625], "page": 13, "span": [0, 281], "__ref_s3_data": null}], "text": "12. Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162-1167. IEEE (2017)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.72238159179688, 290.7889099121094, 480.75555419921875, 333.61895751953125], "page": 13, "span": [0, 275], "__ref_s3_data": null}], "text": "13. Siddiqui, S.A., Fateh, I.A., Rizvi, S.T.R., Dengel, A., Ahmed, S.: Deeptabstr: Deep learning based table structure recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1403-1409 (2019). https:// doi.org/10.1109/ICDAR.2019.00226", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.3740997314453, 247.3230743408203, 480.5928649902344, 289.9039306640625], "page": 13, "span": [0, 241], "__ref_s3_data": null}], "text": "14. Smock, B., Pesala, R., Abraham, R.: PubTables-1M: Towards comprehensive table extraction from unstructured documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4634-4642 (June 2022)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.6051483154297, 181.90472412109375, 480.6208190917969, 245.70274353027344], "page": 13, "span": [0, 405], "__ref_s3_data": null}], "text": "15. Staar, P.W.J., Dolfi, M., Auer, C., Bekas, C.: Corpus conversion service: A machine learning platform to ingest documents at scale. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. pp. 774-782. KDD '18, Association for Computing Machinery, New York, NY, USA (2018). https://doi.org/10.1145/3219819.3219834 , https://doi.org/10. 1145/3219819.3219834", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.76400756835938, 159.9412841796875, 480.5954284667969, 179.845703125], "page": 13, "span": [0, 96], "__ref_s3_data": null}], "text": "16. Wang, X.: Tabular Abstraction, Editing, and Formatting. Ph.D. thesis, CAN (1996), aAINN09397", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.76400756835938, 126.6559829711914, 480.5911865234375, 157.7118377685547], "page": 13, "span": [0, 195], "__ref_s3_data": null}], "text": "17. Xue, W., Li, Q., Tao, D.: Res2tim: Reconstruct syntactic structures from table images. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 749-755. IEEE (2019)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.76499938964844, 690.1593017578125, 231.72048950195312, 699.0250244140625], "page": 14, "span": [0, 19], "__ref_s3_data": null}], "text": "14 M. Lysak, et al.", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [134.63540649414062, 641.2738647460938, 480.59112548828125, 673.007568359375], "page": 14, "span": [0, 223], "__ref_s3_data": null}], "text": "18. Xue, W., Yu, B., Wang, W., Tao, D., Li, Q.: Tgrnet: A table graph reconstruction network for table structure recognition. In: Proceedings of the IEEE/CVF International Conference on Computer Vision. pp. 1295-1304 (2021)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.76499938964844, 598.3690795898438, 480.9535217285156, 640.1014404296875], "page": 14, "span": [0, 269], "__ref_s3_data": null}], "text": "19. Ye, J., Qi, X., He, Y., Chen, Y., Gu, D., Gao, P., Xiao, R.: Pingan-vcgroup's solution for icdar 2021 competition on scientific literature parsing task b: Table recognition to html (2021). https://doi.org/10.48550/ARXIV.2105.01848 , https://arxiv.org/abs/2105.01848", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.35293579101562, 576.3993530273438, 480.5935363769531, 596.5462036132812], "page": 14, "span": [0, 147], "__ref_s3_data": null}], "text": "20. Zhang, Z., Zhang, J., Du, J., Wang, F.: Split, embed and merge: An accurate table structure recognizer. Pattern Recognition 126 , 108565 (2022)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.2264862060547, 521.74560546875, 480.8044738769531, 574.3355712890625], "page": 14, "span": [0, 329], "__ref_s3_data": null}], "text": "21. Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. In: 2021 IEEE Winter Conference on Applications of Computer Vision (WACV). pp. 697-706 (2021). https://doi.org/10.1109/WACV48630.2021. 00074", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [133.99171447753906, 477.6664123535156, 480.5955810546875, 519.9246826171875], "page": 14, "span": [0, 259], "__ref_s3_data": null}], "text": "22. Zhong, X., ShafieiBavani, E., Jimeno Yepes, A.: Image-based table recognition: Data, model, and evaluation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. (eds.) Computer Vision - ECCV 2020. pp. 564-580. Springer International Publishing, Cham (2020)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [134.23336791992188, 444.7017822265625, 480.59454345703125, 475.69757080078125], "page": 14, "span": [0, 206], "__ref_s3_data": null}], "text": "23. Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1015-1022. IEEE (2019)", "type": "paragraph", "name": "List-item", "font": null}], "figures": [{"prov": [{"bbox": [150.0213623046875, 366.15130615234375, 464.4815673828125, 583.114990234375], "page": 2, "span": [0, 574], "__ref_s3_data": null}], "text": "Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, (B) minimal graphical representation of table structure using rectangular layout, (C) HTML representation, (D) OTSL representation. This example demonstrates many of the key-features of OTSL, namely its reduced vocabulary size (12 versus 5 in this case), its reduced sequence length (55 versus 30) and a enhanced internal structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL).", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [137.5374755859375, 452.4152526855469, 476.1513366699219, 562.9699096679688], "page": 5, "span": [0, 73], "__ref_s3_data": null}], "text": "Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [164.22023010253906, 511.6170959472656, 448.9761047363281, 628.123291015625], "page": 7, "span": [0, 207], "__ref_s3_data": null}], "text": "Fig. 3. OTSL description of table structure: A - table example; B - graphical representation of table structure; C - mapping structure on a grid; D - OTSL structure encoding; E - explanation on cell encoding", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [141.4298095703125, 197.92733764648438, 472.34527587890625, 285.1344299316406], "page": 8, "span": [0, 104], "__ref_s3_data": null}], "text": "Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach.", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [162.900146484375, 128.48397827148438, 451.3374328613281, 348.21990966796875], "page": 10, "span": [0, 270], "__ref_s3_data": null}], "text": "Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap (E) than the HTML model (D), when predicting the structure of a sparse table (A), at twice the inference speed because of shorter sequence length (B),(C). \"PMC2807444_006_00.png\" PubTabNet. \u03bc", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [168.26930236816406, 157.55677795410156, 447.7568664550781, 609.8697509765625], "page": 11, "span": [0, 390], "__ref_s3_data": null}], "text": "Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern of horizontally merged cells from the GT (A), unlike the HTML model (C). The HTML model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. \"PMC5406406_003_01.png\" PubTabNet.", "type": "figure", "bounding-box": null}], "tables": [{"prov": [{"bbox": [139.82040405273438, 322.2669982910156, 474.80023193359375, 454.9158935546875], "page": 9, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing the # of layers in encoder and decoder stages of the model show that smaller models trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.", "type": "table", "#-cols": 8, "#-rows": 7, "data": [[{"bbox": [160.3699951171875, 442.1952819824219, 168.0479278564453, 450.2650451660156], "spans": [[0, 0]], "text": "#", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [207.9739990234375, 442.1952819824219, 215.6519317626953, 450.2650451660156], "spans": [[0, 1]], "text": "#", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [239.79800415039062, 436.7162780761719, 278.3176574707031, 444.7860412597656], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [324.6700134277344, 442.1952819824219, 348.2641906738281, 450.2650451660156], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 442.1952819824219, 348.2641906738281, 450.2650451660156], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [324.6700134277344, 442.1952819824219, 348.2641906738281, 450.2650451660156], "spans": [[0, 3], [0, 4], [0, 5]], "text": "TEDs", "type": "col_header", "col": 5, "col-header": false, "col-span": [3, 6], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [396.27099609375, 442.1952819824219, 417.1268310546875, 450.2650451660156], "spans": [[0, 6]], "text": "mAP", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [430.77099609375, 442.1952819824219, 467.1423034667969, 450.2650451660156], "spans": [[0, 7]], "text": "Inference", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [144.5919952392578, 429.2442932128906, 183.82806396484375, 437.3140563964844], "spans": [[1, 0]], "text": "enc-layers", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [192.1949920654297, 429.2442932128906, 231.43106079101562, 437.3140563964844], "spans": [[1, 1]], "text": "dec-layers", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [239.79800415039062, 436.7162780761719, 278.3176574707031, 444.7860412597656], "spans": [[0, 2], [1, 2]], "text": "Language", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [286.6860046386719, 429.2442932128906, 312.3326110839844, 437.3140563964844], "spans": [[1, 3]], "text": "simple", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [320.7019958496094, 429.2442932128906, 353.7198791503906, 437.3140563964844], "spans": [[1, 4]], "text": "complex", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [369.3059997558594, 429.2442932128906, 379.03094482421875, 437.3140563964844], "spans": [[1, 5]], "text": "all", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [394.927001953125, 431.2362976074219, 418.4727783203125, 439.3060607910156], "spans": [[1, 6]], "text": "(0.75)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.14801025390625, 431.2362976074219, 470.76055908203125, 439.3060607910156], "spans": [[1, 7]], "text": "time (secs)", "type": "col_header", "col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [161.906005859375, 410.4142761230469, 166.512939453125, 418.4840393066406], "spans": [[2, 0]], "text": "6", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [209.50900268554688, 410.4142761230469, 214.11593627929688, 418.4840393066406], "spans": [[2, 1]], "text": "6", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [245.17599487304688, 402.9422912597656, 272.9395446777344, 423.96405029296875], "spans": [[2, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [289.0169982910156, 402.9422912597656, 310.0037536621094, 423.96405029296875], "spans": [[2, 3]], "text": "0.965 0.969", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [326.7170104980469, 402.9422912597656, 347.7037658691406, 423.96405029296875], "spans": [[2, 4]], "text": "0.934 0.927", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [363.6759948730469, 402.9422912597656, 384.6627502441406, 423.96405029296875], "spans": [[2, 5]], "text": "0.955 0.955", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [396.20599365234375, 402.9422912597656, 417.1927490234375, 424.0268249511719], "spans": [[2, 6]], "text": "0.88 0.857", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [439.5270080566406, 402.9422912597656, 458.3842468261719, 424.0268249511719], "spans": [[2, 7]], "text": "2.73 5.39", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [161.906005859375, 384.11328125, 166.512939453125, 392.18304443359375], "spans": [[3, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [209.50900268554688, 384.11328125, 214.11593627929688, 392.18304443359375], "spans": [[3, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [245.17599487304688, 376.64129638671875, 272.9395446777344, 397.66204833984375], "spans": [[3, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [289.0169982910156, 376.64129638671875, 310.0037536621094, 397.66204833984375], "spans": [[3, 3]], "text": "0.938 0.952", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [326.7170104980469, 376.64129638671875, 347.7037658691406, 397.66204833984375], "spans": [[3, 4]], "text": "0.904 0.909", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [363.6759948730469, 389.59228515625, 384.6627502441406, 397.66204833984375], "spans": [[3, 5]], "text": "0.927", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [394.6180114746094, 389.79852294921875, 418.77886962890625, 397.7248229980469], "spans": [[3, 6]], "text": "0.853", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [439.5270080566406, 389.79852294921875, 458.3842468261719, 397.7248229980469], "spans": [[3, 7]], "text": "1.97", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [161.906005859375, 357.8122863769531, 166.512939453125, 365.8820495605469], "spans": [[4, 0]], "text": "2", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [209.50900268554688, 357.8122863769531, 214.11593627929688, 365.8820495605469], "spans": [[4, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [245.17599487304688, 350.3403015136719, 272.9395446777344, 371.3610534667969], "spans": [[4, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [289.0169982910156, 363.2912902832031, 310.0037536621094, 371.3610534667969], "spans": [[4, 3]], "text": "0.923", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [326.7170104980469, 350.3403015136719, 347.7037658691406, 371.3610534667969], "spans": [[4, 4]], "text": "0.897 0.901", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [362.0880126953125, 363.2912902832031, 386.2488708496094, 384.7738342285156], "spans": [[4, 5]], "text": "0.938 0.915", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [396.20599365234375, 376.64129638671875, 417.1927490234375, 384.7110595703125], "spans": [[4, 6]], "text": "0.843", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [440.7669982910156, 376.64129638671875, 457.1468200683594, 384.7110595703125], "spans": [[4, 7]], "text": "3.77", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": null, "spans": [[5, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [289.0169982910156, 350.3403015136719, 310.0037536621094, 358.4100646972656], "spans": [[5, 3]], "text": "0.945", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [362.0880126953125, 350.5465393066406, 386.2488708496094, 358.47283935546875], "spans": [[5, 5]], "text": "0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [394.6180114746094, 350.3403015136719, 418.77886962890625, 371.423828125], "spans": [[5, 6]], "text": "0.859 0.834", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [439.5270080566406, 350.3403015136719, 458.3842468261719, 371.423828125], "spans": [[5, 7]], "text": "1.91 3.81", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [161.906005859375, 331.5102844238281, 166.512939453125, 339.5800476074219], "spans": [[6, 0]], "text": "4", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [209.50900268554688, 331.5102844238281, 214.11593627929688, 339.5800476074219], "spans": [[6, 1]], "text": "2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [245.17599487304688, 324.0382995605469, 272.9395446777344, 345.06005859375], "spans": [[6, 2]], "text": "OTSL HTML", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [289.0169982910156, 324.0382995605469, 310.0037536621094, 345.06005859375], "spans": [[6, 3]], "text": "0.952 0.944", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [326.7170104980469, 324.0382995605469, 347.7037658691406, 345.06005859375], "spans": [[6, 4]], "text": "0.92 0.903", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [362.0880126953125, 324.0382995605469, 386.2488708496094, 345.1228332519531], "spans": [[6, 5]], "text": "0.942 0.931", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [394.6180114746094, 324.0382995605469, 418.77886962890625, 345.1228332519531], "spans": [[6, 6]], "text": "0.857 0.824", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [439.5270080566406, 324.0382995605469, 458.3842468261719, 345.1228332519531], "spans": [[6, 7]], "text": "1.22 2", "type": "body", "col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [143.81715393066406, 528.7755126953125, 470.8412170410156, 635.86865234375], "page": 10, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using TableFormer [9] (with enc=6, dec=6, heads=8).", "type": "table", "#-cols": 7, "#-rows": 8, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [215.52499389648438, 617.3963012695312, 254.04464721679688, 625.4660034179688], "spans": [[0, 1], [1, 1]], "text": "Language", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [300.3970031738281, 622.851318359375, 323.9911804199219, 630.9210205078125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "TEDs", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [300.3970031738281, 622.851318359375, 323.9911804199219, 630.9210205078125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "TEDs", "type": "col_header", "col": 3, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [300.3970031738281, 622.851318359375, 323.9911804199219, 630.9210205078125], "spans": [[0, 2], [0, 3], [0, 4]], "text": "TEDs", "type": "col_header", "col": 4, "col-header": false, "col-span": [2, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [370.3450012207031, 617.371337890625, 414.7466125488281, 625.4410400390625], "spans": [[0, 5], [1, 5]], "text": "mAP(0.75)", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 2]}, {"bbox": [423.114013671875, 611.892333984375, 466.7265625, 630.9210205078125], "spans": [[0, 6], [1, 6]], "text": "Inference time (secs)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 0, "row-header": false, "row-span": [0, 2]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [215.52499389648438, 617.3963012695312, 254.04464721679688, 625.4660034179688], "spans": [[0, 1], [1, 1]], "text": "Language", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [262.4129943847656, 609.8992919921875, 288.0596008300781, 617.968994140625], "spans": [[1, 2]], "text": "simple", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [296.4289855957031, 609.8992919921875, 329.4468688964844, 617.968994140625], "spans": [[1, 3]], "text": "complex", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [345.0329895019531, 609.8992919921875, 354.7579345703125, 617.968994140625], "spans": [[1, 4]], "text": "all", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [370.3450012207031, 617.371337890625, 414.7466125488281, 625.4410400390625], "spans": [[0, 5], [1, 5]], "text": "mAP(0.75)", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [0, 2]}, {"bbox": [423.114013671875, 611.892333984375, 466.7265625, 630.9210205078125], "spans": [[0, 6], [1, 6]], "text": "Inference time (secs)", "type": "col_header", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [0, 2]}], [{"bbox": [154.53799438476562, 591.0703125, 201.2412872314453, 599.1400146484375], "spans": [[2, 0], [3, 0]], "text": "PubTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 4]}, {"bbox": [222.43699645996094, 596.54931640625, 247.13226318359375, 604.6190185546875], "spans": [[2, 1]], "text": "OTSL", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [264.7439880371094, 596.54931640625, 285.7307434082031, 604.6190185546875], "spans": [[2, 2]], "text": "0.965", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [302.4440002441406, 596.54931640625, 323.4307556152344, 604.6190185546875], "spans": [[2, 3]], "text": "0.934", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [339.40301513671875, 596.54931640625, 360.3897705078125, 604.6190185546875], "spans": [[2, 4]], "text": "0.955", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [383.1159973144531, 596.7554931640625, 401.9732360839844, 604.6818237304688], "spans": [[2, 5]], "text": "0.88", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [435.4930114746094, 596.7554931640625, 454.3502502441406, 604.6818237304688], "spans": [[2, 6]], "text": "2.73", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [154.53799438476562, 591.0703125, 201.2412872314453, 599.1400146484375], "spans": [[2, 0], [3, 0]], "text": "PubTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [2, 4]}, {"bbox": [220.9029998779297, 583.5983276367188, 248.66656494140625, 591.6680297851562], "spans": [[3, 1]], "text": "HTML", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [264.7439880371094, 583.5983276367188, 285.7307434082031, 591.6680297851562], "spans": [[3, 2]], "text": "0.969", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [302.4440002441406, 583.5983276367188, 323.4307556152344, 591.6680297851562], "spans": [[3, 3]], "text": "0.927", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [339.40301513671875, 583.5983276367188, 360.3897705078125, 591.6680297851562], "spans": [[3, 4]], "text": "0.955", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [382.052001953125, 583.5983276367188, 403.03875732421875, 591.6680297851562], "spans": [[3, 5]], "text": "0.857", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [436.73199462890625, 583.5983276367188, 453.11181640625, 591.6680297851562], "spans": [[3, 6]], "text": "5.39", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [155.94500732421875, 564.768310546875, 199.833740234375, 572.8380126953125], "spans": [[4, 0], [5, 0]], "text": "FinTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 6]}, {"bbox": [222.43699645996094, 570.248291015625, 247.13226318359375, 578.3179931640625], "spans": [[4, 1]], "text": "OTSL", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [264.7439880371094, 570.248291015625, 285.7307434082031, 578.3179931640625], "spans": [[4, 2]], "text": "0.955", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [302.4440002441406, 570.248291015625, 323.4307556152344, 578.3179931640625], "spans": [[4, 3]], "text": "0.961", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [337.81500244140625, 570.4544677734375, 361.9758605957031, 578.3807983398438], "spans": [[4, 4]], "text": "0.959", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [380.4639892578125, 570.4544677734375, 404.6248474121094, 578.3807983398438], "spans": [[4, 5]], "text": "0.862", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [435.4930114746094, 570.4544677734375, 454.3502502441406, 578.3807983398438], "spans": [[4, 6]], "text": "1.85", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [155.94500732421875, 564.768310546875, 199.833740234375, 572.8380126953125], "spans": [[4, 0], [5, 0]], "text": "FinTabNet", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [4, 6]}, {"bbox": [220.9029998779297, 557.2963256835938, 248.66656494140625, 565.3660278320312], "spans": [[5, 1]], "text": "HTML", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [264.7439880371094, 557.2963256835938, 285.7307434082031, 565.3660278320312], "spans": [[5, 2]], "text": "0.917", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [302.4440002441406, 557.2963256835938, 323.4307556152344, 565.3660278320312], "spans": [[5, 3]], "text": "0.922", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [341.70599365234375, 557.2963256835938, 358.0858154296875, 565.3660278320312], "spans": [[5, 4]], "text": "0.92", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [382.052001953125, 557.2963256835938, 403.03875732421875, 565.3660278320312], "spans": [[5, 5]], "text": "0.722", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [436.73199462890625, 557.2963256835938, 453.11181640625, 565.3660278320312], "spans": [[5, 6]], "text": "3.26", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [148.62600708007812, 538.4673461914062, 207.15240478515625, 546.5370483398438], "spans": [[6, 0], [7, 0]], "text": "PubTables-1M", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 8]}, {"bbox": [222.43699645996094, 543.9473266601562, 247.13226318359375, 552.0170288085938], "spans": [[6, 1]], "text": "OTSL", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [264.7439880371094, 543.9473266601562, 285.7307434082031, 552.0170288085938], "spans": [[6, 2]], "text": "0.987", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [302.4440002441406, 543.9473266601562, 323.4307556152344, 552.0170288085938], "spans": [[6, 3]], "text": "0.964", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [337.81500244140625, 544.1535034179688, 361.9758605957031, 552.079833984375], "spans": [[6, 4]], "text": "0.977", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [380.4639892578125, 544.1535034179688, 404.6248474121094, 552.079833984375], "spans": [[6, 5]], "text": "0.896", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [435.4930114746094, 544.1535034179688, 454.3502502441406, 552.079833984375], "spans": [[6, 6]], "text": "1.79", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [148.62600708007812, 538.4673461914062, 207.15240478515625, 546.5370483398438], "spans": [[6, 0], [7, 0]], "text": "PubTables-1M", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [6, 8]}, {"bbox": [220.9029998779297, 530.9953002929688, 248.66656494140625, 539.0650024414062], "spans": [[7, 1]], "text": "HTML", "type": "row_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [264.7439880371094, 530.9953002929688, 285.7307434082031, 539.0650024414062], "spans": [[7, 2]], "text": "0.983", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [302.4440002441406, 530.9953002929688, 323.4307556152344, 539.0650024414062], "spans": [[7, 3]], "text": "0.944", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [339.40301513671875, 530.9953002929688, 360.3897705078125, 539.0650024414062], "spans": [[7, 4]], "text": "0.966", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [382.052001953125, 530.9953002929688, 403.03875732421875, 539.0650024414062], "spans": [[7, 5]], "text": "0.889", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [436.73199462890625, 530.9953002929688, 453.11181640625, 539.0650024414062], "spans": [[7, 6]], "text": "3.26", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 7, "row-header": false, "row-span": [7, 8]}]], "model": null, "bounding-box": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data/redp5110.doctags.txt b/tests/data/redp5110.doctags.txt new file mode 100644 index 00000000..c830a72f --- /dev/null +++ b/tests/data/redp5110.doctags.txt @@ -0,0 +1,1843 @@ + +Front cover +
+ +
+Row and Column Access Control Support in IBM DB2 for i +
+ +
+
+ +
+
+ +
+International Technical Support Organization +Row and Column Access Control Support in IBM DB2 for i +November 2014 +Note: Before using this information and the product it supports, read the information in "Notices" on page vii. +First Edition (November 2014) +This edition applies to Version 7, Release 2 of IBM i (product number 5770-SS1). +' Copyright International Business Machines Corporation 2014. All rights reserved. +Note to U.S. Government Users Restricted Rights -- Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +Contents + + +Notices. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vii +Trademarks. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . viii +DB2 for i Center of Excellence. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ix +Preface. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xi +Authors . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xi +Now you can become a published author, too!. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiii +Comments welcome. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .xiii +Stay connected to IBM Redbooks. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiv +Chapter 1. Securing and protecting IBM DB2 data . . . . . . . . . . . . . . . . . . . . . . . . . . . . .1 +1.1 Security fundamentals. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 +1.2 Current state of IBM i security . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .2 +1.3 DB2 for i security controls . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 +1.3.1 Existing row and column control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .4 +1.3.2 New controls: Row and Column Access Control. . . . . . . . . . . . . . . . . . . . . . . . . . .5 +Chapter 2. Roles and separation of duties . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .7 +2.1 Roles . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .8 +2.1.1 DDM and DRDA application server access: QIBM_DB_DDMDRDA . . . . . . . . . . .8 +2.1.2 Toolbox application server access: QIBM_DB_ZDA. . . . . . . . . . . . . . . . . . . . . . . .8 +2.1.3 Database Administrator function: QIBM_DB_SQLADM . . . . . . . . . . . . . . . . . . . . .9 +2.1.4 Database Information function: QIBM_DB_SYSMON. . . . . . . . . . . . . . . . . . . . . . 9 +2.1.5 Security Administrator function: QIBM_DB_SECADM . . . . . . . . . . . . . . . . . . . . . .9 +2.1.6 Change Function Usage CL command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .10 +2.1.7 Verifying function usage IDs for RCAC with the FUNCTION_USAGE view . . . . .10 +2.2 Separation of duties . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10 +Chapter 3. Row and Column Access Control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .13 +3.1 Explanation of RCAC and the concept of access control . . . . . . . . . . . . . . . . . . . . . . .14 +3.1.1 Row permission and column mask definitions. . . . . . . . . . . . . . . . . . . . . . . . . . . 14 +3.1.2 Enabling and activating RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .16 +3.2 Special registers and built-in global variables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .18 +3.2.1 Special registers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .18 +3.2.2 Built-in global variables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .19 +3.3 VERIFY_GROUP_FOR_USER function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .20 +3.4 Establishing and controlling accessibility by using the RCAC rule text . . . . . . . . . . . . .21 +3.5 SELECT, INSERT, and UPDATE behavior with RCAC. . . . . . . . . . . . . . . . . . . . . . . . 22 +3.6 Human resources example . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .22 +3.6.1 Assigning the QIBM_DB_SECADM function ID to the consultants. . . . . . . . . . . .23 +3.6.2 Creating group profiles for the users and their roles . . . . . . . . . . . . . . . . . . . . . . .23 +3.6.3 Demonstrating data access without RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .24 +3.6.4 Defining and creating row permissions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .25 +3.6.5 Defining and creating column masks. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26 +3.6.6 Activating RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .28 +3.6.7 Demonstrating data access with RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .29 +3.6.8 Demonstrating data access with a view and RCAC . . . . . . . . . . . . . . . . . . . . . . .32 +
+ + +Chapter 4. Implementing Row and Column Access Control: Banking example . . . . .37 +4.1 Business requirements for the RCAC banking scenario . . . . . . . . . . . . . . . . . . . . . . . .38 +4.2 Description of the users roles and responsibilities . . . . . . . . . . . . . . . . . . . . . . . . . . . .39 +4.3 Implementation of RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .42 +4.3.1 Reviewing the tables that are used in this example. . . . . . . . . . . . . . . . . . . . . . . 42 +4.3.2 Assigning function ID QIBM_DB_SECADM to the Database Engineers group. . 47 +4.3.3 Creating group profiles for the users and their roles . . . . . . . . . . . . . . . . . . . . . . .50 +. . . . . . . . . . . . . . . . . . . . . 52 +4.3.5 Defining and creating row permissions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .54 +4.3.6 Defining and creating column masks58 +. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4.3.7 Restricting the inserting and updating of masked data . . . . . . . . . . . . . . . . . . . . .60 +79 +4.3.8 Activating row and column access control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4.3.9 Reviewing row permissions. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .64 +4.3.10 Demonstrating data access with RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .66 +4.3.11 Query implementation with RCAC activated . . . . . . . . . . . . . . . . . . . . . . . . . . . .75 +Chapter 5. RCAC and non-SQL interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .80 +5.1 Unsupported interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .5.1 Unsupported interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +5.2 Native query result differences . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.3 Accidental updates with masked values . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .5.2 Native query result differences . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.3 Accidental updates with masked values . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +81 +5.4 System CL commands considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .82 +5.4.1 Create Duplicate Object (CRTDUPOBJ) command . . . . . . . . . . . . . . . . . . . . . . .82 +82 +5.4.2 Copy File (CPYF) command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.4.3 Copy Library (CPYLIB) command. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .5.4.2 Copy File (CPYF) command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.4.3 Copy Library (CPYLIB) command. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +83 +Chapter 6. Additional considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .108 +6.2 RCAC effects on data movement . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6.2 RCAC effects on data movement . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +. . . . . . . . . . . . . . . . . . . . . . .88 +6.2.1 Effects when RCAC is defined on the source table6.2.1 Effects when RCAC is defined on the source table +6.2.3 Effects when RCAC is defined on both source and target tables . . . . . . . . . . . . . 6.3 RCAC effects on joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .90 91 +6.3.1 Inner joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 926.3.1 Inner joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 +6.3.2 Outer joins. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 946.3.2 Outer joins. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +6.3.3 Exception joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .96 +6.4 Monitoring, analyzing, and debugging with RCAC97 +. . . . . . . . . . . . . . . . . . . . . . . . . . . .Query monitoring and analysis tools . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 +6.4.2 Index advisor. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6.4.2 Index advisor. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +6.4.3 Metadata using catalogs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6.4.3 Metadata using catalogs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +6.5 Views, materialized query tables, and query rewrite with RCAC . . . . . . . . . . . . . . . .102 +Materialized query tables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .103 +6.5.2105 +6.5.3 Query rewrite6.5.3 Query rewrite +6.6 RCAC effects on performance and scalability. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .105 +107 +6.7 Exclusive lock to implement RCAC (availability issues) . . . . . . . . . . . . . . . . . . . . . . . 6.8 Avoiding propagation of masked data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6.7 Exclusive lock to implement RCAC (availability issues) . . . . . . . . . . . . . . . . . . . . . . . 6.8 Avoiding propagation of masked data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +108 +109 109 +110 +113 +111 +Chapter 7. Row and Column Access Control management . . . . . . . . . . . . . . . . . . . .Chapter 7. Row and Column Access Control management . . . . . . . . . . . . . . . . . . . . +
+ + +7.1 Managing row permissions and column masks. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .114 +7.1.1 Source management. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .114 +7.1.2 Modifying definitions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .114 +7.1.3 Turning on and off . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .114 +7.1.4 Regenerating. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 114 +7.2 Managing tables with row permissions and column masks. . . . . . . . . . . . . . . . . . . . .115 +7.2.1 Save and restore. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .115 +7.2.2 Table migration . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .116 +7.3 Monitoring and auditing function usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .117 +Chapter 8. Designing and planning for success. . . . . . . . . . . . . . . . . . . . . . . . . . . . . 119 +8.1 Implementing RCAC with good design and proper planning. . . . . . . . . . . . . . . . . . . 120 +8.2 DB2 for i Center of Excellence . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .120 +Appendix A. Database definitions for the RCAC banking example. . . . . . . . . . . . . . 121 +Related publications. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127 +Other publications. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127 +Online resources. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127 +Help from IBM. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 128 +
+Notices +This information was developed for products and services offered in the U.S.A. +IBM may not offer the products, services, or features discussed in this document in other countries. Consult your local IBM representative for information on the products and services currently available in your area. Any reference to an IBM product, program, or service is not intended to state or imply that only that IBM product, program, or service may be used. Any functionally equivalent product, program, or service that does not infringe any IBM intellectual property right may be used instead. However, it is the user's responsibility to evaluate and verify the operation of any non-IBM product, program, or service. +IBM may have patents or pending patent applications covering subject matter described in this document. The furnishing of this document does not grant you any license to these patents. You can send license inquiries, in writing, to: +IBM Director of Licensing, IBM Corporation, North Castle Drive, Armonk, NY 10504-1785 U.S.A. +The following paragraph does not apply to the United Kingdom or any other country where such provisions are inconsistent with local law: INTERNATIONAL BUSINESS MACHINES CORPORATION PROVIDES THIS PUBLICATION "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Some states do not allow disclaimer of express or implied warranties in certain transactions, therefore, this statement may not apply to you. +This information could include technical inaccuracies or typographical errors. Changes are periodically made to the information herein; these changes will be incorporated in new editions of the publication. IBM may make improvements and/or changes in the product(s) and/or the program(s) described in this publication at any time without notice. +Any references in this information to non-IBM websites are provided for convenience only and do not in any manner serve as an endorsement of those websites. The materials at those websites are not part of the materials for this IBM product and use of those websites is at your own risk. +IBM may use or distribute any of the information you supply in any way it believes appropriate without incurring any obligation to you. +Any performance data contained herein was determined in a controlled environment. Therefore, the results obtained in other operating environments may vary significantly. Some measurements may have been made on development-level systems and there is no guarantee that these measurements will be the same on generally available systems. Furthermore, some measurements may have been estimated through extrapolation. Actual results may vary. Users of this document should verify the applicable data for their specific environment. +Information concerning non-IBM products was obtained from the suppliers of those products, their published announcements or other publicly available sources. IBM has not tested those products and cannot confirm the accuracy of performance, compatibility or any other claims related to non-IBM products. Questions on the capabilities of non-IBM products should be addressed to the suppliers of those products. +This information contains examples of data and reports used in daily business operations. To illustrate them as completely as possible, the examples include the names of individuals, companies, brands, and products. All of these names are fictitious and any similarity to the names and addresses used by an actual business enterprise is entirely coincidental. +COPYRIGHT LICENSE: +This information contains sample application programs in source language, which illustrate programming techniques on various operating platforms. You may copy, modify, and distribute these sample programs in any form without payment to IBM, for the purposes of developing, using, marketing or distributing application programs conforming to the application programming interface for the operating platform for which the sample programs are written. These examples have not been thoroughly tested under all conditions. IBM, therefore, cannot guarantee or imply reliability, serviceability, or function of these programs. +Trademarks +IBM, the IBM logo, and ibm.com are trademarks or registered trademarks of International Business Machines Corporation in the United States, other countries, or both. These and other IBM trademarked terms are marked on their first occurrence in this information with the appropriate symbol (fi or ™), indicating US registered or common law trademarks owned by IBM at the time this information was published. Such trademarks may also be registered or common law trademarks in other countries. A current list of IBM trademarks is available on the Web at http://www.ibm.com/legal/copytrade.shtml +The following terms are trademarks of the International Business Machines Corporation in the United States, other countries, or both: + + +AS/400fiIBMfiRedpaper™ +DB2fiPower Systems™Redbooks (log o) fi System +DRDAfiRedbooksfiifi +
+The following terms are trademarks of other companies: +Windows, and the Windows logo are trademarks of Microsoft Corporation in the United States, other countries, or both. +Other company, product, or service names may be trademarks or service marks of others. +DB2 for i Center of Excellence +Solution Brief IBM Systems Lab Services and Training +Highlights +GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH +GLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH +GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH +GLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH +
+ +
+Power Services +DB2 for i Center of Excellence +Expert help to achieve your business requirements +We build confident, satisfied clients +No one else has the vast consulting experiences, skills sharing and renown service offerings to do what we can do for you. +Because no one else is IBM. +With combined experiences and direct access to development groups, we're the experts in IBM DB2® for i. The DB2 for i Center of Excellence (CoE) can help you achieve-perhaps reexamine and exceed-your business requirements and gain more confidence and satisfaction in IBM product data management products and solutions. +Who we are, some of what we do +Global CoE engagements cover topics including: +r Database performance and scalability +r Advanced SQL knowledge and skills transfer +r Business intelligence and analytics +r DB2 Web Query +r Query/400 modernization for better reporting and analysis capabilities +r Database modernization and re-engineering +r Data-centric architecture and design +r Extremely large database and overcoming limits to growth +r ISV education and enablement +What you can expect +Depending on the engagement, our team of consultants offer: +r Briefings, consulting and guidance on demand +r Illumination of the DB2 for i capabilities and leadership to exploit them +r Analysis and remediation of performance and scalability issues caused by inefficient database design and implementation +r Configuration of systems, operating system and products to fully leverage database capabilities +Key client benefits +T Gain greater database and application performance within your current environment. Achieve greater productivity in the development and maintenance of database and applications using modern techniques. Architect and design data structures to accommodate and benefit from business analytics (BA) tools and processes. +For more information +Pricing depends on the scope of work. Learn more about the DB2 for i Center of Excellence and other related products and services. Contact stgls@us.ibm.com or visit: +ibm.com GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH +
+ +
+© Copyright IBM Corporation 2013 +IBM Corporation +Route 100 +Somers, NY 10589 +Produced in the United States of America March 2013 +IBM, the IBM logo, ibm.com, DB2 and Power Systems are trademarks of International Business Machines Corp., registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the web at "Copyright and trademark information" at www.ibm.com/legal/ copytrade.shtml . +This document is current as of the initial date of publication and may be changed by IBM at any time. +Not all offerings are available in every country in which IBM operates. +
+ +
+Please Recycle +Preface +This IBMfi Redpaper™ publication provides information about the IBM i 7.2 feature of IBM DB2fi for i Row and Column Access Control (RCAC). It offers a broad description of the function and advantages of controlling access to data in a comprehensive and transparent way. This publication helps you understand the capabilities of RCAC and provides examples of defining, creating, and implementing the row permissions and column masks in a relational database environment. +This paper is intended for database engineers, data-centric application developers, and security officers who want to design and implement RCAC as a part of their data control and governance policy. A solid background in IBM i object level security, DB2 for i relational database concepts, and SQL is assumed. +This paper was produced by the IBM DB2 for i Center of Excellence team in partnership with the International Technical Support Organization (ITSO), Rochester, Minnesota US. +
+ +
+
+ +
+Jim Bainbridge is a senior DB2 consultant on the DB2 for i Center of Excellence team in the IBM Lab Services and Training organization. His primary role is training and implementation services for IBM DB2 Web Query for i and business analytics. Jim began his career with IBM 30 years ago in the IBM Rochester Development Lab, where he developed cooperative processing products that paired IBM PCs with IBM S/36 and AS/.400 systems. In the years since, Jim has held numerous technical roles, including independent software vendors technical support on a broad range of IBM technologies and products, and supporting customers in the IBM Executive Briefing Center and IBM Project Office. +Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Rochester, Minnesota. He writes extensively and teaches IBM classes worldwide in all areas of DB2 for i. Before joining STG Lab Services, he worked in the ITSO for nine years writing multiple IBM Redbooksfi publications. He also worked for IBM Colombia as an IBM AS/400fi IT Specialist doing presales support for the Andean countries. He has 28 years of experience in the computing field and has taught database classes in Colombian universities. He holds a Master's degree in Computer Science from EAFIT, Colombia. His areas of expertise are database technology, performance, and data warehousing. Hernando can be contacted at hbedoya@us.ibm.com . +Authors +
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+Rob Bestgen is a member of the DB2 for i Center of Excellence team helping customers use the capabilities of DB2 for i. In addition, Rob is the chief architect of the DB2 SQL Query Engine (SQE) for DB2 for i and is the product development manager for DB2 Web Query for i. +Mike Cain is a Senior Technical Staff Member within the IBM Systems and Technology Group. He is also the founder and team leader of the DB2 for i Center of Excellence in Rochester, Minnesota US. Before his current position, he worked as an IBM AS/400 Systems Engineer and technical consultant. Before joining IBM in 1988, Mike worked as a System/38 programmer and data processing manager for a property and casualty insurance company. Mike has 26 years of experience with IBM, engaging clients and Business Partners around the world. In addition to assisting clients, he uses his knowledge and experience to influence the IBM solution, development, and support processes. +Dan Cruikshank has been an IT Professional since 1972. He has consulted on a number of different project areas since joining IBM Rochester in 1988. Since 1993, Dan was focused primarily on resolving IBM System ifi application and database performance issues at several IBM customer accounts. Since 1998, Dan has been one of the primary instructors for the Database Optimization Workshop. Most recently, Dan is a member of the DB2 for i Center of Excellence team with IBM Rochester Lab Services. +Jim Denton is a senior consultant at the IBM DB2 for i Center of Excellence, where his responsibilities include both teaching courses and hands on consulting. Jim specializes in SQL performance, data-centric programming, and database modernization. Jim started his IBM career in 1981 as an S/38 operating system programmer. Before joining the consulting team, his key assignments included 10 years as a systems performance specialist, five years as the lead "JDE on i" analyst, three years as a consultant at the IBM Benchmark and Briefing Center in Montpellier France, and a total of 11 years as an operating system developer, including five years designing and implementing enhancements to DB2 for i. +Doug Mack is a DB2 for i and Business Intelligence Consultant in the IBM Power Systems™ Lab Services organization. Doug's 30+ year career with IBM spans many roles, including product development, technical sales support, Business Intelligence Sales Specialist, and DB2 for i Product Marketing Manager. Doug is a featured speaker at User Group conferences and meetings, IBM Technical Conferences, and Executive Briefings. +
+ +
+
+ +
+Tom McKinley is an IBM Lab Services Consultant working on DB2 for IBM i in Rochester MN. His main focus is complex query performance that is associated with Business Intelligence running on Very Large Databases. He worked as a developer or performance analyst in the DB area from 1986 until 2006. Some of his major pieces of work include the Symmetric Multiple processing capabilities of DB2 for IBM i and Large Object Data types. In addition, he was on the original team that designed and built the SQL Query Engine. Before his database work, he worked on Licensed Internal Code for System 34 and System 36. +Kent Milligan is a senior DB2 consultant on the DB2 for i Center of Excellence team within the IBM Lab Services and Training organization. His primary responsibility is helping software developers use the latest DB2 technologies and port applications from other databases to DB2 for i. After graduating from the University of Iowa, Kent spent the first eight years of his IBM career as a member of the DB2 development team in Rochester. +Thanks to the following people for their contributions to this project: +Debra Landon +International Technical Support Organization, Rochester Center +Craig Aldrich, Mark Anderson, Theresa Euler, Scott Forstie, Chad Olstad IBM Rochester Development +Now you can become a published author, too! +Here's an opportunity to spotlight your skills, grow your career, and become a published author-all at the same time! Join an ITSO residency project and help write a book in your area of expertise, while honing your experience using leading-edge technologies. Your efforts will help to increase product acceptance and customer satisfaction, as you expand your network of technical contacts and relationships. Residencies run from two to six weeks in length, and you can participate either in person or as a remote resident working from your home base. +Find out more about the residency program, browse the residency index, and apply online at: ibm.com /redbooks/residencies.html +Comments welcome +Your comments are important to us! +We want our papers to be as helpful as possible. Send us your comments about this paper or other IBM Redbooks publications in one of the following ways: +GLYPH Use the online Contact us review Redbooks form found at: +ibm.com /redbooks +GLYPH Send your comments in an email to: +redbooks@us.ibm.com +GLYPH Mail your comments to: IBM Corporation, International Technical Support Organization Dept. HYTD Mail Station P099 2455 South Road Poughkeepsie, NY 12601-5400 +Stay connected to IBM Redbooks +GLYPH Find us on Facebook: +http://www.facebook.com/IBMRedbooks +GLYPH Follow us on Twitter: +http://twitter.com/ibmredbooks +GLYPH Look for us on LinkedIn: +http://www.linkedin.com/groups?home=&gid=2130806 +GLYPH Explore new Redbooks publications, residencies, and workshops with the IBM Redbooks weekly newsletter: +https://www.redbooks.ibm.com/Redbooks.nsf/subscribe?OpenForm +GLYPH Stay current on recent Redbooks publications with RSS Feeds: +http://www.redbooks.ibm.com/rss.html +
+ +
+Chapter 1. +Securing and protecting IBM DB2 data +Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record. +Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement. +This chapter describes how you can secure and protect data in DB2 for i. The following topics are covered in this chapter: +GLYPH Security fundamentals +GLYPH Current state of IBM i security +GLYPH DB2 for i security controls +1.1 Security fundamentals +Before reviewing database security techniques, there are two fundamental steps in securing information assets that must be described: +GLYPH First, and most important, is the definition of a company's security policy . Without a security policy, there is no definition of what are acceptable practices for using, accessing, and storing information by who, what, when, where, and how. A security policy should minimally address three things: confidentiality, integrity, and availability. +The monitoring and assessment of adherence to the security policy determines whether your security strategy is working. Often, IBM security consultants are asked to perform security assessments for companies without regard to the security policy. Although these assessments can be useful for observing how the system is defined and how data is being accessed, they cannot determine the level of security without a security policy. Without a security policy, it really is not an assessment as much as it is a baseline for monitoring the changes in the security settings that are captured. +A security policy is what defines whether the system and its settings are secure (or not). +GLYPH The second fundamental in securing data assets is the use of resource security . If implemented properly, resource security prevents data breaches from both internal and external intrusions. Resource security controls are closely tied to the part of the security policy that defines who should have access to what information resources. A hacker might be good enough to get through your company firewalls and sift his way through to your system, but if they do not have explicit access to your database, the hacker cannot compromise your information assets. +With your eyes now open to the importance of securing information assets, the rest of this chapter reviews the methods that are available for securing database resources on IBM i. +1.2 Current state of IBM i security +Because of the inherently secure nature of IBM i, many clients rely on the default system settings to protect their business data that is stored in DB2 for i. In most cases, this means no data protection because the default setting for the Create default public authority (QCRTAUT) system value is *CHANGE. +Even more disturbing is that many IBM i clients remain in this state, despite the news headlines and the significant costs that are involved with databases being compromised. This default security configuration makes it quite challenging to implement basic security policies. A tighter implementation is required if you really want to protect one of your company's most valuable assets, which is the data. +Traditionally, IBM i applications have employed menu-based security to counteract this default configuration that gives all users access to the data. The theory is that data is protected by the menu options controlling what database operations that the user can perform. This approach is ineffective, even if the user profile is restricted from running interactive commands. The reason is that in today's connected world there are a multitude of interfaces into the system, from web browsers to PC clients, that bypass application menus. If there are no object-level controls, users of these newer interfaces have an open door to your data. +Some clients using this default configuration have toughened their database security with exit-point solutions from third-party vendors. IBM i exit points allow a user-written program to be called every time that a particular interface (for example, FTP) is used or an event occurs (for example, a profile is created). Security tools that are based on these exit points increase the level of security on a system by locking down interfaces that are not under the control of menu-based or application authority. In addition, exit-point solutions allow clients to implement more granular security controls, such as allowing users access only to the database during certain hours of the day. +Although exit-point solutions can provide great benefits, they are not an alternative to object-level control of your databases. Exit-point solutions help secure interfaces, but they do not completely protect the data that is stored in your DB2 objects. Exit points do not exist for every data access interface on the system. Therefore, if an application starts using an unprotected interface, the only thing protecting your data is object-level access control. When your security implementation totally relies on exit points, then it is also important to track any new data interfaces that appear as IBM delivers new releases and products to ensure that your exit-point solution provides coverage for those new interfaces. +An exit-point solution is a good option for databases with security holes that are caused by a reliance on the default security setup or menu-based control. However, your security work should not stop there. Instead, you must continue to work on a complete database security solution by controlling data access at the object level. +1.3 DB2 for i security controls +As described in 1.2, "Current state of IBM i security" on page 2, object-level controls on your DB2 objects are a critical success factor in securing your business data. Although database object-level security is a strong security feature, some clients have found that object-level security does not have the granularity that is required to adhere to regulatory or compliance policies. A user that is granted object-level access to a DB2 table has the authority to view all of the rows and values in that table. +As shown in Figure 1-1, it is an all-or-nothing access to the rows of a table. +Figure 1-1 All-or-nothing access to the rows of a table +
+ +Figure 1-1 All-or-nothing access to the rows of a table +
+Many businesses are trying to limit data access to a need-to-know basis. This security goal means that users should be given access only to the minimum set of data that is required to perform their job. Often, users with object-level access are given access to row and column values that are beyond what their business task requires because that object-level security provides an all-or-nothing solution. For example, object-level controls allow a manager to access data about all employees. Most security policies limit a manager to accessing data only for the employees that they manage. +1.3.1 Existing row and column control +Some IBM i clients have tried augmenting the all-or-nothing object-level security with SQL views (or logical files) and application logic, as shown in Figure 1-2. However, application-based logic is easy to bypass with all of the different data access interfaces that are provided by the IBM i operating system, such as Open Database Connectivity (ODBC) and System i Navigator. +Using SQL views to limit access to a subset of the data in a table also has its own set of challenges. First, there is the complexity of managing all of the SQL view objects that are used for securing data access. Second, scaling a view-based security solution can be difficult as the amount of data grows and the number of users increases. +Even if you are willing to live with these performance and management issues, a user with *ALLOBJ access still can directly access all of the data in the underlying DB2 table and easily bypass the security controls that are built into an SQL view. +Figure 1-2 Existing row and column controls +
+ +Figure 1-2 Existing row and column controls +
+1.3.2 New controls: Row and Column Access Control +Based on the challenges that are associated with the existing technology available for controlling row and column access at a more granular level, IBM delivered new security support in the IBM i 7.2 release; this support is known as Row and Column Access Control (RCAC). +The new DB2 RCAC support provides a method for controlling data access across all interfaces and all types of users with a data-centric solution. Moving security processing to the database layer makes it easier to build controls that meet your compliance policies. The RCAC support provides an additional layer of security that complements object-level authorizations to limit data access to a need-to-know basis. Therefore, it is critical that you first have a sound object-level security implementation in place. +
+ +
+Chapter 2. +Roles and separation of duties +One of the primary objectives of row and column access control (RCAC) is to create data security policies that control and govern user access to data and limit the data access of DB2 designers and administrators to only the minimum that is required to do their jobs. +To accomplish these tasks, RCAC engineers devised a set of functional roles that, as a group, implement effectively data access requirements and also limit the span of control of each role so that each role is given only the authorities that are needed to perform its specific set of tasks. +This chapter describes the concepts of roles and separation of duties on DB2 for i and covers the following topics: +GLYPH Roles +GLYPH Separation of duties +2.1 Roles +Traditionally, data access roles are defined in a binary way, where access to the data is either not permitted or access to the data is permitted. A full access capability can also be instantiated by the *ALLOBJ special authority, either explicitly or implicitly, for the security officer. If you hold the role of security officer, or have all *ALLOBJ special authority, you have access to all the data, with no exceptions. Unfortunately, this might not meet the organization's requirements for limiting access to data or separation of duties. +To assist with defining roles and the separation of duties with appropriate authority, IBM i provides function usage IDs . A function usage ID implements granular security controls rather than granting users powerful special authorities, such as all object, job control, or service. +Roles are divided among the following DB2 functions and their corresponding function usage IDs: +GLYPH DDM and IBM DRDAfi application server access: QIBM_DB_DDMDRDA +GLYPH Toolbox application server access: QIBM_DB_ZDA +GLYPH Database Administrator function: QIBM_DB_SQLADM +GLYPH Database Information function: QIBM_DB_SYSMON +GLYPH Security Administrator function: QIBM_DB_SECADM +2.1.1 DDM and DRDA application server access: QIBM_DB_DDMDRDA +The QIBM_DB_DDMDRDA function usage ID restricts access to the DDM and DRDA application server (QRWTSRVR). This function usage ID provides an easy alternative (rather than writing an exit program) to control access to DDM and DRDA from the server side. The function usage IDs ship with the default authority of *ALLOWED. The security officer can easily deny access to specific users or groups. +This is an alternative to a User Exit Program approach. No coding is required, it is easy to change, and it is auditable. +2.1.2 Toolbox application server access: QIBM_DB_ZDA +The QIBM_DB_ZDA function usage ID restricts access to the optimized server that handles DB2 requests from clients (QZDASOINIT and QZDASSINIT). Server access is used by the ODBC, OLE DB, and .NET providers that ship with IBM i Access for Windows and JDBC Toolbox, Run SQL scripts, and other parts of System i Navigator and Navigator for i Web console. +This function usage ID provides an easy alternative (rather than writing an exit program) to control access to these functions from the server side. The function usage IDs ship with the default authority of *ALLOWED. The security officer can easily deny access to specific users or groups. +This is an alternative to a User Exit Program approach. No coding is required, it is easy to change, and it is auditable. +2.1.3 Database Administrator function: QIBM_DB_SQLADM +The Database Administrator function (QIBM_DB_SQLADM) is needed whenever a user is analyzing and viewing SQL performance data. Some of the more common database administrator functions include displaying statements from the SQL Plan Cache, analyzing SQL Performance Monitors and SQL Plan Cache Snapshots, and displaying the SQL details of a job other than your own. +The Database Administrator function provides an alternative to granting *JOBCTL, but simply having the Database Administrator authorization does not carry with it all the needed object authorities for every administration task. The default behavior is to deny authorization. +To perform database administrator tasks that are not related to performance analysis, you must refer to the details of the task to determine its specific authorization requirements. For example, to allow a database administrator to reorganize a table, the DBA must have additional object authorities to the table that are not covered by QIBM_DB_SQLADM. +Granting QIBM_DB_SQLADM function usage +Only the security administrator (*SECADM) is allowed to change the list of users that can perform Database Administration functions. +2.1.4 Database Information function: QIBM_DB_SYSMON +The Database Information function (QIBM_DB_SYSMON) provides much less authority than Database Administrator function. Its primary use allows a user to examine high-level database properties. +For example, a user that does not have *JOBCTL or QIBM_DB_SQLADM can still view the SQL Plan Cache properties if granted authority to QIBM_DB_SYSMON. Without granting this authority, the default behavior is to deny authorization. +Granting QIBM_DB_SYSMON function usage +Only the security administrator (*SECADM) is allowed to change the list of users that can perform Database Information functions. +2.1.5 Security Administrator function: QIBM_DB_SECADM +The Security Administrator function (QIBM_DB_SECADM) grants authorities, revokes authorities, changes ownership, or changes the primary group without giving access to the object or, in the case of a database table, to the data that is in the table or allowing other operations on the table. +Only those users with the QIBM_DB_SECADM function can administer and manage RCAC rules. RCAC can be used to prevent even users with *ALLOBJ authority from freely accessing all the data in a protected database. These users are excluded from data access unless they are specifically authorized by RCAC. Without granting this authority, the default behavior is to deny authorization. +Granting QIBM_DB_SECADM function usage +Only QSECOFR or a user with *SECADM special authority can grant the QIBM_DB_SECADM function usage to a user or group. +2.1.6 Change Function Usage CL command +The following CL commands can be used to work with, display, or change function usage IDs: +GLYPH Work Function Usage ( WRKFCNUSG ) +GLYPH Change Function Usage ( CHGFCNUSG ) +GLYPH Display Function Usage ( DSPFCNUSG ) +For example, the following CHGFCNUSG command shows granting authorization to user HBEDOYA to administer and manage RCAC rules: +CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(HBEDOYA) USAGE(*ALLOWED) +2.1.7 Verifying function usage IDs for RCAC with the FUNCTION_USAGE view +The FUNCTION_USAGE view contains function usage configuration details. Table 2-1 describes the columns in the FUNCTION_USAGE view. +Table 2-1 FUNCTION_USAGE view + + + +Column nameData typeDescription +FUNCTION_IDVARCHAR(30)ID of the function. +USER_NAMEVARCHAR(10)Name of the user profile that has a usage setting for this function. +USAGEVARCHAR(7)Usage setting: GLYPH ALLOWED: The user profile is allowed to use the function. GLYPH DENIED: The user profile is not allowed to use the function. +USER_TYPEVARCHAR(5)Type of user profile: GLYPH USER: The user profile is a user. GLYPH GROUP: The user profile is a group. +
Table 2-1 FUNCTION_USAGE view
+To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1. +Example 2-1 Query to determine who has authority to define and manage RCAC + + + +SELECTfunction_id, user_name, +usage, +user_type +FROMfunction_usage +WHEREfunction_id='QIBM_DB_SECADM' +ORDER BYuser_name; +
Example 2-1 Query to determine who has authority to define and manage RCAC
+2.2 Separation of duties +Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority. +For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security. +In IBM i 7.2, the QIBM_DB_SECADM function usage grants authorities, revokes authorities, changes ownership, or changes the primary group without giving access to the object or, in the case of a database table, to the data that is in the table or allowing other operations on the table. +QIBM_DB_SECADM function usage can be granted only by a user with *SECADM special authority and can be given to a user or a group. +QIBM_DB_SECADM also is responsible for administering RCAC, which restricts which rows a user is allowed to access in a table and whether a user is allowed to see information in certain columns of a table. +A preferred practice is that the RCAC administrator has the QIBM_DB_SECADM function usage ID, but absolutely no other data privileges. The result is that the RCAC administrator can deploy and maintain the RCAC constructs, but cannot grant themselves unauthorized access to data itself. +Table 2-2 shows a comparison of the different function usage IDs and *JOBCTL authority to the different CL commands and DB2 for i tools. +Table 2-2 Comparison of the different function usage IDs and *JOBCTL authority + + + +User action*JOBCTLQIBM_DB_SECADMQIBM_DB_SQLADMQIBM_DB_SYSMON No Authority +SET CURRENT DEGREE (SQL statement)XX +CHGQRYA command targeting a different user's jobXX +STRDBMON or ENDDBMON commands targeting a different user's jobXX +STRDBMON or ENDDBMON commands targeting a job that matches the current userXX XX +QUSRJOBI() API format 900 or System i Navigator's SQL Details for JobXXX +Visual Explain within Run SQL scriptsXXX X +Visual Explain outside of Run SQL scriptsXX +ANALYZE PLAN CACHE procedureXX +DUMP PLAN CACHE procedureXX +MODIFY PLAN CACHE procedureXX +MODIFY PLAN CACHE PROPERTIES procedure (currently does not check authority) +XX +CHANGE PLAN CACHE SIZE procedure (currently does not check authority)XX +
Table 2-2 Comparison of the different function usage IDs and *JOBCTL authority
+ + +User action*JOBCTLQIBM_DB_SECADMQIBM_DB_SQLADMQIBM_DB_SYSMONNo Authority +START PLAN CACHE EVENT MONITOR procedureXX +END PLAN CACHE EVENT MONITOR procedureXX +END ALL PLAN CACHE EVENT MONITORS procedureXX +Work with RCAC row permissions (Create, modify, or delete)X +Work with RCAC column masks (Create, modify, or delete)X +Change Object Owner ( CHGOBJOWN ) CL commandX +Change Object Primary Group ( CHGOBJPGP ) CL commandX +Grant Object Authority ( GRTOBJAUT ) CL commandX +Revoke Object Authority ( RVKOBJAUT ) CL commandX +Edit Object Authority ( EDTOBJAUT ) CL commandX +Display Object Authority ( DSPOBJAUT ) CL commandX +Work with Objects ( WRKOBJ ) CL commandX +Work with Libraries ( WRKLIB ) CL commandX +Add Authorization List Entry ( ADDAUTLE ) CL commandX +Change Authorization List Entry ( CHGAUTLE ) CL commandX +Remove Authorization List Entry ( RMVAUTLE ) CL commandX +Retrieve Authorization List Entry ( RTVAUTLE ) CL commandX +Display Authorization List ( DSPAUTL ) CL commandX +Display Authorization List Objects ( DSPAUTLOBJ ) CL commandX +Edit Authorization List ( EDTAUTL ) CL commandX +Work with Authorization Lists ( WRKAUTL ) CL commandX +
+
+ +
+Chapter 3. +3 +Row and Column Access Control +This chapter describes what Row and Column Access Control (RCAC) is, its components, and then illustrates RCAC with a simple example. +The following topics are covered in this chapter: +GLYPH Explanation of RCAC and the concept of access control +GLYPH Special registers and built-in global variables +GLYPH VERIFY_GROUP_FOR_USER function +GLYPH Establishing and controlling accessibility by using the RCAC rule text +GLYPH SELECT, INSERT, and UPDATE behavior with RCAC +GLYPH Human resources example +3.1 Explanation of RCAC and the concept of access control +RCAC limits data access to those users who have a business "need to know". RCAC makes it easy to set up a rich and robust security policy that is based on roles and responsibilities. RCAC functionality is made available through the optional, no charge feature called "IBM Advanced Data Security for i", also known as option 47 of IBM i 7.2. +In DB2 for i, RCAC is implemented using two different approaches that address the shortcomings of traditional control methods and mechanisms: +GLYPH Row permissions +GLYPH Column masks +Another benefit of RCAC is that no database user is automatically exempt from the control. Users with *ALLOBJ authority can no longer freely access all of the data in the database unless they have the appropriate permission to do so. The ability to manage row permissions and column masks rests with the database security administrator. The RCAC definitions, enablement, and activation are controlled by SQL statements. +Row permissions and column masks require virtually no application changes. RCAC is based on specific rules that are transparent to existing applications and SQL interfaces. Enforcement of your security policy does not depend on how applications or tools access the data. +RCAC also facilitates multi-tenancy, which means that several independent customers or business units can share a single database table without being aware of one another. The RCAC row permission ensures each user sees only the rows they are entitled to view because the enforcement is handled by DB2 and not the application logic. +Label-based access control (LBAC): RCAC and LBAC are not the same thing. LBAC is a security model that is primarily intended for government applications. LBAC requires that data and users be classified with a fixed set of rules that are implemented. RCAC is a general-purpose security model that is primarily intended for commercial customers. You can use RCAC to create your own security rules, which in turn allows for more flexibility. +3.1.1 Row permission and column mask definitions +The following sections define row permission and column masks. +Row permission +A row permission is a database object that manifests a row access control rule for a specific table. It is essentially a search condition that describes which rows you can access. For example, a manager can see only the rows that represent his or her employees. +The SQL CREATE PERMISSION statement that is shown in Figure 3-1 is used to define and initially enable or disable the row access rules. +Figure 3-1 CREATE PERMISSION SQL statement +
+ +Figure 3-1 CREATE PERMISSION SQL statement +
+Column mask +A column mask is a database object that manifests a column value access control rule for a specific column in a specific table. It uses a CASE expression that describes what you see when you access the column. For example, a teller can see only the last four digits of a tax identification number. +Column masks replace the need to create and use views to implement access control. The SQL CREATE MASK statement that is shown in Figure 3-2 is used to define and initially enable or disable the column value access rules. +Figure 3-2 CREATE MASK SQL statement +
+ +Figure 3-2 CREATE MASK SQL statement +
+3.1.2 Enabling and activating RCAC +You can enable, disable, or regenerate row permissions and column masks by using the SQL ALTER PERMISSION statement and the SQL ALTER MASK statement, as shown in Figure 3-3 on page 17. +Enabling and disabling effectively turns on or off the logic that is contained in the row permission or column mask. Regenerating causes the row permission or column mask to be regenerated. The row permission definition in the catalog is used and existing dependencies and authorizations, if any, are retained. The row permission definition is reevaluated as though the row permission were being created. Any user-defined functions (UDFs) that are referenced in the row permission must be resolved to the same secure UDFs as were resolved during the original row permission or column mask creation. The regenerate option can be used to ensure that the RCAC logic is intact and still valid before any user attempts to access the table. +Note: An exclusive lock is required on the table object to perform the alter operation. All open cursors must be closed. +Figure 3-3 ALTER PERMISSION and ALTER MASK SQL statements +
+ +Figure 3-3 ALTER PERMISSION and ALTER MASK SQL statements +
+You can activate and deactivate RCAC for new or existing tables by using the SQL ALTER TABLE statement (Figure 3-4). The ACTIVATE or DEACTIVATE clause must be the option that is specified in the statement. No other alterations are permitted at the same time. The activating and deactivating effectively turns on or off all RCAC processing for the table. Only enabled row permissions and column masks take effect when activating RCAC. +Note: An exclusive lock is required on the table object to perform the alter operation. All open cursors must be closed. +Figure 3-4 ALTER TABLE SQL statement +
+ +Figure 3-4 ALTER TABLE SQL statement +
+When row access control is activated on a table, a default permission is established for that table. The name of this permission is QIBM_DEFAULT_ _. This default permission contains a simple piece of logic (0=1) which is never true. The default permission effectively denies access to every user unless there is a permission defined that allows access explicitly. If row access control is activated on a table, and there is no permission that is defined, no one has permission to any rows. All queries against the table produce an empty set. +It is possible to define, create, and enable multiple permissions on a table. Logically, all of the permissions are ORed together to form a comprehensive test of the user's ability to access the data. A column can have only one mask that is defined over it. From an implementation standpoint, it does not matter if you create the column masks first or the row permissions first. +Note: If a user does not have permission to access the row, the column mask logic is not invoked. +3.2 Special registers and built-in global variables +This section describes how you can use special registers and built-in global variables to implement RCAC. +3.2.1 Special registers +A special register is a storage area that is defined for an application process by DB2 and is used to store information that can be referenced in SQL statements. A reference to a special register is a reference to a value that is provided by the current server. +IBM DB2 for i supports four different special registers that can be used to identify what user profiles are relevant to determining object authorities in the current connection to the server. SQL uses the term runtime authorization ID , which corresponds to a user profile on DB2 for i. Here are the four special registers: +GLYPH USER is the runtime user profile that determines the object authorities for the current connection to the server. It has a data type of VARCHAR(18). This value can be changed by the SQL statement SET SESSION AUTHORIZATION . +GLYPH SESSION_USER is the same as the USER register, except that it has a data type of VARCHAR(128). +GLYPH CURRENT USER was added in IBM i 7.2 and is similar to the USER register, but it has one important difference in that it also reports adopted authority. High-level language programs and SQL routines such as functions, procedures, and triggers can optionally be created to run using either the caller's or the owner's user profile to determine data authorities. For example, an SQL procedure can be created to run under the owner's authority by specifying SET OPTION USRPRF=*OWNER . This special register can also be referenced as CURRENT_USER. It has a data type of VARCHAR(128). +GLYPH SYSTEM_USER is the user profile that initiates the connection to the server. It is not used by RCAC, but is included here for completeness. Many jobs, including the QZDASOINIT prestarted jobs, initially connect to the server with a default user profile and then change to use some other user profile. SYSTEM_USER reports this value, typically QUSER for a QZDASOINIT job. It has a data type of VARCHAR(128). +In addition to these four special registers, any of the DB2 special registers can be referenced as part of the rule text. +Table 3-1 summarizes these special registers and their values. +Table 3-1 Special registers and their corresponding values + + + +Special registerCorresponding value +USER or SESSION_USERThe effective user of the thread excluding adopted authority. +CURRENT_USERThe effective user of the thread including adopted authority. When no adopted authority is present, this has the same value as USER. +SYSTEM_USERThe authorization ID that initiated the connection. +
Table 3-1 Special registers and their corresponding values
+Figure 3-5 shows the difference in the special register values when an adopted authority is used: +GLYPH A user connects to the server using the user profile ALICE. +GLYPH USER and CURRENT USER initially have the same value of ALICE. +GLYPH ALICE calls an SQL procedure that is named proc1, which is owned by user profile JOE and was created to adopt JOE's authority when it is called. +GLYPH While the procedure is running, the special register USER still contains the value of ALICE because it excludes any adopted authority. The special register CURRENT USER contains the value of JOE because it includes any adopted authority. +GLYPH When proc1 ends, the session reverts to its original state with both USER and CURRENT USER having the value of ALICE. +Figure 3-5 Special registers and adopted authority +
+ +Figure 3-5 Special registers and adopted authority +
+3.2.2 Built-in global variables +Built-in global variables are provided with the database manager and are used in SQL statements to retrieve scalar values that are associated with the variables. +IBM DB2 for i supports nine different built-in global variables that are read only and maintained by the system. These global variables can be used to identify attributes of the database connection and used as part of the RCAC logic. +Table 3-2 lists the nine built-in global variables. +Table 3-2 Built-in global variables + + + +Global variableTypeDescription +CLIENT_HOSTVARCHAR(255)Host name of the current client as returned by the system +CLIENT_IPADDRVARCHAR(128)IP address of the current client as returned by the system +CLIENT_PORTINTEGERPort used by the current client to communicate with the server +PACKAGE_NAMEVARCHAR(128)Name of the currently running package +PACKAGE_SCHEMAVARCHAR(128)Schema name of the currently running package +PACKAGE_VERSIONVARCHAR(64)Version identifier of the currently running package +ROUTINE_SCHEMAVARCHAR(128)Schema name of the currently running routine +ROUTINE_SPECIFIC_NAMEVARCHAR(128)Name of the currently running routine +ROUTINE_TYPECHAR(1)Type of the currently running routine +
Table 3-2 Built-in global variables
+3.3 VERIFY_GROUP_FOR_USER function +The VERIFY_GROUP_FOR_USER function was added in IBM i 7.2. Although it is primarily intended for use with RCAC permissions and masks, it can be used in other SQL statements. The first parameter must be one of these three special registers: SESSION_USER, USER, or CURRENT_USER. The second and subsequent parameters are a list of user or group profiles. Each of these values must be 1 - 10 characters in length. These values are not validated for their existence, which means that you can specify the names of user profiles that do not exist without receiving any kind of error. +If a special register value is in the list of user profiles or it is a member of a group profile included in the list, the function returns a long integer value of 1. Otherwise, it returns a value of 0. It never returns the null value. +Here is an example of using the VERIFY_GROUP_FOR_USER function: +1. There are user profiles for MGR, JANE, JUDY, and TONY. +2. The user profile JANE specifies a group profile of MGR. +3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1: +VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') +3.4 Establishing and controlling accessibility by using the RCAC rule text +When defining a row permission or column mask, the "magic" of establishing and controlling accessibility comes from the rule text . The rule text represents the search criteria and logic that is implemented by the database engine. +In the case of a row permission, the rule text is the "test" of whether the user can access the row. If the test result is true, the row can be accessed. If the test result is false, the row essentially does not exist for the user. From a set-at-a-time perspective, the permission defines which rows can be part of the query result set, and which rows cannot. +In the case of a column mask, the rule text is both the test of whether the user can see the actual column value, and it is the masking logic if the user cannot have access to actual column value. +For a simple example of implementing row permissions and column masks, see 3.6, "Human resources example" on page 22. +In general, almost any set-based, relational logic is valid. For the row permission, the search condition follows the same rules that are used by the search condition in a WHERE clause. +For the column mask, the logic follows the same rules as the CASE expression. The result data type, length, null attribute, and CCSID of the CASE expression must be compatible with the data type of the column. If the column does not allow the null value, the result of the CASE expression cannot be the NULL value. The application or interface making the data access request is expecting that all of the column attributes and values are consistent with the original definition, regardless of any masking. +For more information about what is permitted, see the "Database programming" topic of the IBM i 7.2 Knowledge Center, found at: +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgdbp.htm?lang =en +One of the first tasks in either the row permission or the column mask logic is to determine who the user is, and whether they have access to the data. Elegant methods to establish the identity and attributes of the user can be employed by using the special registers, global variables, and the VERIFY function. After the user's identity is established, it is a simple matter of allowing or disallowing access by using true or false testing. The examples that are included in this paper demonstrate some of the more common and obvious techniques. +More sophisticated methods can employ existential, day of year / time of day, and relational comparisons with set operations. For example, you can use a date master or date dimension table to determine whether the current date is a normal business day. If the current date is a valid business day, then access is allowed. If the current date is not a business day (for example a weekend day or holiday), access is denied. This test can be accomplished by performing a lookup using a subquery, such as the one that is shown in Example 3-1. +Example 3-1 Subquery that is used as part of the rule + + + +CURRENT_DATE IN (SELECT D.DATE_KEYDATE_MASTER D D.BUSINESS_DAY = 'Y') +FROM WHERE +
Example 3-1 Subquery that is used as part of the rule
+Given that joins and subqueries can be used to perform set-based operations against existing data that is housed in other objects, almost any relational test can be constructed. If the data in the objects is manipulated over time, the RCAC test logic (and user query results) can be changed without modifying the actual row permission or column mask. This includes moving a user from one group to another or changing a column value that is used to allow or disallow access. For example, if Saturday is now a valid business day, only the BUSINESS_DAY value in the DATE_MASTER must be updated, not the permission logic. This technique can potentially avoid downtime because of the exclusive lock that is needed on the table when adding or changing RCAC definitions. +3.5 SELECT, INSERT, and UPDATE behavior with RCAC +RCAC provides a database-centric approach to determining which rows can be accessed and what column values can be seen by a specific user. Given that the control is handled by DB2 internally, every data manipulation statement is under the influence of RCAC, with no exceptions. When accessing the table, the SELECT statements, searched UPDATE statements, and searched DELETE statements implicitly and transparently contain the row permission and the column mask rule text. This means that the data set can be logically restricted and reduced on a user by user basis. +Furthermore, DB2 prevents an INSERT statement from inserting a row or an UPDATE statement from modifying a row such that the current user cannot be permitted to access it. You cannot create a situation in which the data you inserted or changed is no longer accessible to you. +For more information and considerations about data movement in an RCAC environment, see Chapter 6, "Additional considerations" on page 85. +Note: DB2 does not provide any indication back to the user that the data set requested was restricted or reduced by RCAC. This is by design, as it helps minimize any changes to the applications accessing the data. +3.6 Human resources example +This section illustrates with a simple example the usage of RCAC on a typical Human Resources application (schema). In this sample Human Resources schema, there is an important table that is called EMPLOYEES that contains all the information that is related to the employees of the company. Among the information that normally is stored in the EMPLOYEES table, there is some sensitive information that must be hidden from certain users: +GLYPH Tax_Id information +GLYPH YEAR of the birth date of the employee (hiding the age of the employee) +In this example, there are four different types of users: +GLYPH Employees +GLYPH Managers +GLYPH Human Resources Manager +GLYPH Consultant/IT Database Engineer (In this example, this person is an external consultant that is not an employee of the company.) +The following sections describe step-by-step what is needed to be done to implement RCAC in this environment. +3.6.1 Assigning the QIBM_DB_SECADM function ID to the consultants +The consultant must have authority to implement RCAC, so you must use one of the function IDs that are provided in DB2 for i (see 2.1.5, "Security Administrator function: QIBM_DB_SECADM" on page 9). Complete the following steps: +1. Run the Change Functional Usage ( CHGFCNUSG ) CL commands that are shown in Example 3-2. These commands must be run by someone that has the *SECOFR authority. +Example 3-2 Function ID required to implement RCAC +CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(HBEDOYA) USAGE(*ALLOWED) CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(MCAIN) USAGE(*ALLOWED) +2. There is a way to discover which user profiles have authorization to implement RCAC. This can be done by running the SQL statement that is shown in Example 3-3. +Example 3-3 Verifying what user profiles have authorization to implement RCAC +SELECT function_id, user_name, usage, user_type FROM qsys2.function_usage WHERE function_id ='QIBM_DB_SECADM' ORDER BY user_name; +3. The result of the SQL statement is shown in Figure 3-6. In this example, either MCAIN or HBEDOYA can implement RCAC in the Human Resources database. +Figure 3-6 Result of the function ID query +3.6.2 Creating group profiles for the users and their roles +Assuming that all the employees have a valid user profile, the next step is to create group profiles to group the employees. Complete the following steps: +1. In this example, there are three group profiles: +-HR (Human Resource personnel) +-MGR (Managers) +-EMP (Employees) +These are created by creating user profiles with no password. Example 3-4 shows the Create User Profile ( CRTUSRPRF ) CL commands that you use to create these group profiles. +Example 3-4 Creating group profiles +CRTUSRPRF USRPRF(EMP) PASSWORD() TEXT('Employees Group') CRTUSRPRF USRPRF(MGR) PASSWORD() TEXT('Managers Group') CRTUSRPRF USRPRF(HR) PASSWORD() TEXT('Human Resources Group') +2. You now must assign users to a group profile. Employees go in to the EMP group profile, Managers go into the MGR group profile, and Human Resource employees go into the HR group profile. For simplicity, this example selects one employee (DSSMITH), one manager (TQSPENSER), and one HR analyst (VGLUCCHESS). +Note: Neither of the consultants (MCAIN and HBEDOYA) belong to any group profile. +3.6.3 Demonstrating data access without RCAC +Before implementing RCAC, run some simple SQL statements to demonstrate data access without RCAC. Complete the following steps: +1. The first SQL statement, which is shown in Example 3-5, basically counts the total number of rows in the EMPLOYEES table. +Example 3-5 Counting the number of employees +SELECT COUNT(*) as ROW_COUNT FROM HR_SCHEMA.EMPLOYEES; +The result of this query is shown in Figure 3-7, which is the total number of employees of the company. +Figure 3-7 Number of employees +
+ +Figure 3-7 Number of employees +
+2. Run a second SQL statement (shown in Example 3-6) that lists the employees. If you have read access to the table, you see all the rows no matter who you are. +Example 3-6 Displaying the information of the Employees +SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES +The result of this query is shown in Figure 3-8. +Figure 3-8 List of employees without RCAC enabled +3.6.4 Defining and creating row permissions +Implement RCAC on the EMPLOYEES table by completing the following steps: +1. Start by defining a row permission. In this example, the rules to enforce include the following ones: +-Human Resources employees can see all the rows. +-Managers can see only information for the employees that they manage. +-Employees can see only their own information. +-Consultants are not allowed to see any rows in the table. +To implement this row permission, run the SQL statement that is shown in Example 3-7. +Example 3-7 Creating a permission for the EMPLOYEE table +CREATE PERMISSION HR_SCHEMA.PERMISSION1_ON_EMPLOYEES ON HR_SCHEMA.EMPLOYEES AS EMPLOYEES FOR ROWS WHERE ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR' ) = 1 ) OR ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND ( EMPLOYEES . MANAGER_OF_EMPLOYEE = SESSION_USER OR EMPLOYEES . USER_ID = SESSION_USER ) ) OR ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'EMP' ) = 1 AND EMPLOYEES . USER_ID = SESSION_USER ) ENFORCED FOR ALL ACCESS ENABLE ; +2. Look at the definition of the table and see the permissions, as shown in Figure 3-9. QIBM_DEFAULT_EMPLOYEE_HR_SCHEMA is the default permission, as described in 3.1.2, "Enabling and activating RCAC" on page 16. +Figure 3-9 Row permissions that are shown in System i Navigator +
+ +Figure 3-9 Row permissions that are shown in System i Navigator +
+3.6.5 Defining and creating column masks +Define the different masks for the columns that are sensitive by completing the following steps: +1. Start with the DAY_OF_BIRTH column. In this example, the rules to enforce include the following ones: +-Human Resources can see the entire date of birth of the employees. +-Employees can see only their own date of birth. +-Managers can see the date of birth of their employees masked with YEAR being 9999. +To implement this column mask, run the SQL statement that is shown in Example 3-8. +Example 3-8 Creation of a mask on the DATE_OF_BIRTH column + + + +CREATE MASKHR_SCHEMA.MASK_DATE_OF_BIRTH_ON_EMPLOYEES +ONHR_SCHEMA.EMPLOYEES AS EMPLOYEES +FOR COLUMNDATE_OF_BIRTH +
Example 3-8 Creation of a mask on the DATE_OF_BIRTH column
+RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ; +2. The other column to mask in this example is the TAX_ID information. In this example, the rules to enforce include the following ones: +-Human Resources can see the unmasked TAX_ID of the employees. +-Employees can see only their own unmasked TAX_ID. +-Managers see a masked version of TAX_ID with the first five characters replaced with the X character (for example, XXX-XX-1234). +-Any other person sees the entire TAX_ID as masked, for example, XXX-XX-XXXX. +To implement this column mask, run the SQL statement that is shown in Example 3-9. +Example 3-9 Creating a mask on the TAX_ID column +CREATE MASK HR_SCHEMA.MASK_TAX_ID_ON_EMPLOYEES ON HR_SCHEMA.EMPLOYEES AS EMPLOYEES FOR COLUMN TAX_ID RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR' ) = 1 THEN EMPLOYEES . TAX_ID WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . TAX_ID WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 'XXX-XX-' CONCAT QSYS2 . SUBSTR ( EMPLOYEES . TAX_ID , 8 , 4 ) ) WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'EMP' ) = 1 THEN EMPLOYEES . TAX_ID ELSE 'XXX-XX-XXXX' END ENABLE ; +3. Figure 3-10 shows the masks that are created in the HR_SCHEMA. +Figure 3-10 Column masks shown in System i Navigator +
+ +Figure 3-10 Column masks shown in System i Navigator +
+3.6.6 Activating RCAC +Now that you have created the row permission and the two column masks, RCAC must be activated. The row permission and the two column masks are enabled (last clause in the scripts), but now you must activate RCAC on the table. To do so, complete the following steps: +1. Run the SQL statements that are shown in Example 3-10. +Example 3-10 Activating RCAC on the EMPLOYEES table +/* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL; +2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition . +Figure 3-11 Selecting the EMPLOYEES table from System i Navigator +
+ +Figure 3-11 Selecting the EMPLOYEES table from System i Navigator +
+3. The EMPLOYEES table definition is displayed, as shown in Figure 3-12. Note that the Row access control and Column access control options are checked. +Figure 3-12 RCAC enabled on the EMPLOYEES table +
+ +Figure 3-12 RCAC enabled on the EMPLOYEES table +
+3.6.7 Demonstrating data access with RCAC +You are now ready to start testing RCAC with the four different users. Complete the following steps: +1. The first SQL statement that is shown in Example 3-11 illustrates the EMPLOYEE count. You know that there are 42 rows from the query that was run before RCAC was put in place (see 3.6.3, "Demonstrating data access without RCAC" on page 24). +Example 3-11 EMPLOYEES count +SELECT COUNT(*) as ROW_COUNT FROM HR_SCHEMA.EMPLOYEES; +2. The result of the query for a user that belongs to the HR group profile is shown in Figure 3-13. This user can see all the 42 rows (employees). +Figure 3-13 Count of EMPLOYEES by HR +
+ +Figure 3-13 Count of EMPLOYEES by HR +
+3. The result of the same query for a user who is logged on as TQSPENSER (Manager) is shown in Figure 3-14. TQSPENSER has five employees in his department and he can also see his own row, which is why the count is 6. +Figure 3-14 Count of EMPLOYEES by a manager +
+ +Figure 3-14 Count of EMPLOYEES by a manager +
+4. The result of the same query that is run by an employee (DSSMITH) gives the result that is shown in Figure 3-15. Each employee can see only his or her own data (row). +Figure 3-15 Count of EMPLOYEES by an employee +
+ +Figure 3-15 Count of EMPLOYEES by an employee +
+5. The result of the same query that is run by the Consultant/DBE gives the result that is shown in Figure 3-16. The consultants/DBE can manage and implement RCAC, but they do not see any rows at all. +Figure 3-16 Count of EMPLOYEES by a consultant +
+ +Figure 3-16 Count of EMPLOYEES by a consultant +
+Does the result make sense? Yes, it does because RCAC is enabled. +6. Run queries against the EMPLOYEES table. The query that is used in this example runs and tests with the four different user profiles and is the same query that was run in 3.6.3, "Demonstrating data access without RCAC" on page 24. It is shown in Example 3-12. +Example 3-12 SELECT statement to test with the different users +SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES +7. Figure 3-17 shows the results of the query for a Human Resources (VGLUCCHESS) user profile. The user can see all the rows and all the columns. +Figure 3-17 SQL statement result by Human Resources user profile +8. Figure 3-18 shows the results of the same query for the Manager (TQSPENSER). Notice the masking of the DATE_OF_BIRTH and TAX_ID columns. +Figure 3-18 SQL statement result by Manager profile +9. Figure 3-19 shows the results of the same query for an employee (DSSMITH). The employee can only see only his own data with no masking at all. +Figure 3-19 SQL statement result by an employee profile +10.Figure 3-20 shows the results of the same query for the Consultant/DBE, who is not one of the company's employees. +Figure 3-20 SQL statement result by Consultant/DBE profile +3.6.8 Demonstrating data access with a view and RCAC +This section covers data access with a view and RCAC. Complete the following steps: +1. The EMPLOYEES table has a column that is called On_Leave_Flag (Figure 3-21 on page 33) indicating that the employee is on Leave of Absence. For this purpose, a view is created that lists only the employees that are on leave. +Figure 3-21 Employees on leave +2. Example 3-13 shows the definition of the view. +Example 3-13 VIew of employees on leave +CREATE VIEW HR_SCHEMA.EMPLOYEES_ON_LEAVE (EMPLOYEE_ID, FIRST_NAME, MIDDLE_INITIAL, LAST_NAME, WORK_DEPARTMENT, PHONE_EXTENSION, JOB_DESCRIPTION, DATE_OF_BIRTH, +TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE, ON_LEAVE_FLAG ) +AS SELECT EMPLOYEE_ID, FIRST_NAME , MIDDLE_INITIAL, LAST_NAME , WORK_DEPARTMENT, PHONE_EXTENSION, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE, ON_LEAVE_FLAG FROM HR_SCHEMA.EMPLOYEES WHERE ON_LEAVE_FLAG = 'Y'; +3. Use the view to query the data and see who is on leave. The SQL statement that is used is shown in Example 3-14: +Example 3-14 SQL statement for employees on leave +SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES_ON_LEAVE; +4. Start with the Human Resources person (VGLUCCHESS) and see what is the result of the previous query. He sees the two employees that are on leave and no masking is done over the DATE_OF_BIRTH and TAX_ID columns. The results of the query are shown in Figure 3-22. +Figure 3-22 Employees on leave - Human Resources user +5. Figure 3-23 shows what the Manager (TQSPENSER) gets when he runs the same query over the view. He sees only the employees that are on leave that are managed by him. In this example, it is one employee. The columns are masked, which confirms that RCAC is applied to the view as well. +Figure 3-23 Employee on leave - Manager of Field Reps user +6. Figure 3-24 shows what the employee (DSSMITH) gets when he runs the same query over the view. The employee gets an empty set or he gets only himself if he is on leave. +. +Figure 3-24 Employees on leave - employee user +
+ +
+Chapter 4. +4 +Implementing Row and Column Access Control: Banking example +This chapter illustrates the Row and Column Access Control (RCAC) concepts using a banking example. Appendix A, "Database definitions for the RCAC banking example" on page 121 provides a script that you can use to create all the database definitions or DDLs to re-create this RCAC example. +The following topics are covered in this chapter: +GLYPH Business requirements for the RCAC banking scenario +GLYPH Description of the users roles and responsibilities +GLYPH Implementation of RCAC +4.1 Business requirements for the RCAC banking scenario +As part of a new internet banking project, the Bank decides to raise the level of data access control on the following three tables that are involved in the new customer-facing application: +GLYPH CUSTOMERS +GLYPH ACCOUNTS +GLYPH TRANSACTIONS +RCAC will be used to restrict access to the rows in these three tables by using permissions, and to restrict column values by using masks. The default position is that no user can access the rows in the tables. From there, specific bank employees are allowed access only to the rows for their job responsibilities. In addition, columns containing personal or sensitive data are masked appropriately. Bank customers are allowed access to only their rows and column values. +In this example, it is assumed that the Bank employees have access to the tables when working on the premises only. Employee access to data is provided by programs and tools using standard DB2 interfaces, such as embedded SQL, ODBC, JDBC, and CLI. The database connection authentication for these interfaces uses the employee's personal and unique IBM i user profile. Operating in their professional role, employees do not have access to bank data through the Internet. +Bank customers have access to their accounts and transactions by using a new web application. Each customer has unique credentials for logging in to the application. The authentication of the customer is handled by the web server. After the customer is authenticated, the web server establishes a connection to DB2 for data access. This connection uses a common IBM i user profile that is known as WEBUSER. This user profile is secured and is used only by the web application. No Bank employee has access to the WEBUSER profile, and no customer has an IBM i user profile. +The customer's identity is passed to DB2 by using a global variable. The global variable is secured and can be accessed only by the WEBUSER. The web application sets the CUSTOMER_LOGIN_ID variable to the customer's login value. This value is compared to the customer's login value that is found in the CUSTOMER_LOGIN_ID column of the CUSTOMERS table. +Applications that do not use the web interface do not have to be changed because the global variable is NULL by default. +A diagram of the internet banking architecture is shown in Figure 4-1: +GLYPH The row permission and column masks for the CUSTOMERS table are based on the group of which the user profile is part. If the user is a customer, their specific login ID also is tested. +GLYPH The row permission and column mask for the ACCOUNTS table are based on the CUSTOMERS table permission rules. A subquery is used to connect the accounts (child) with the customer (parent). +GLYPH The row permission for the TRANSACTIONS table is based on the ACCOUNTS table permission rules and the CUSTOMERS table permission rules. A subquery is used to connect the transactions (child) with the account (parent) and the account (child) with the customer (parent). +Figure 4-1 Internet banking example +
+ +Figure 4-1 Internet banking example +
+4.2 Description of the users roles and responsibilities +During the requirements gathering phase, the following groups of users are identified and codified: +GLYPH SECURITY: Security officer and security administrators +GLYPH DBE: Database engineers +GLYPH ADMIN: Bank business administrators +GLYPH TELLER: Bank tellers +GLYPH CUSTOMER: Bank customers using the internet +GLYPH PUBLIC: Anyone not already in a group +Based on their respective roles and responsibilities, the users (that is, a group) are controlled by row permissions and column masks. The chart that is shown in Figure 4-2 shows the rules for row and column access in this example. +Figure 4-2 Rules for row and column access + + + +CUSTOMERSCUSTOMERSACCOUNTSACCOUNTSTRANSACTIONSTRANSACTIONS +SECURITYNo RowsYesNo RowsYesNo RowsNo +DBEAll RowsYesAll RowsYesAll RowsNo +ADMINAll RowsNoAll RowsNoAll RowsNo +TELLERAll RowsYesAll RowsNoAll RowsNo +CUSTOMEROwn RowsNoOwn RowsNoOwn RowsNo +PUBLICNo RowsYesNo RowsYesNo RowsNo +
Figure 4-2 Rules for row and column access
+The chart that is shown in Figure 4-3 shows the column access that is allowed by group and lists the column masks by table. +Figure 4-3 Column masks + + + +CUSTOMERSACCOUNTS +SECURITYNo RowsCUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_IDACCOUNT_NUMBER +DBEAll RowsCUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_IDACCOUNT NUMBER ACCOUNT_NUMBER +ADMINAll RowsNoneNone +TELLERAll RowsCUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER TAX ID _ _None +CUSTOMEROwn RowsNoneNone +PUBLICNo RowsCUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER LOGIN ID CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_IDACCOUNT_NUMBER +
Figure 4-3 Column masks
+For the demonstration and testing of RCAC in this example, the following users interact with the database. Furthermore, the column masking rules are developed independently of the row permissions. If a person does not have permission to access the row, the column mask processing does not occur. +GLYPH Hernando Bedoya is a DB2 for i database engineer with the user profile of HBEDOYA. He is part of the DBE group. +GLYPH Mike Cain is a DB2 for i database engineer with the user profile of MCAIN. He is part of the DBE group. +GLYPH Veronica G. Lucchess is a bank account administrator with the user profile of VGLUCCHESS. She is part of the ADMIN group. +GLYPH Tom Q. Spenser is a bank teller with the user profile of TQSPENSER. He is part of the TELLER group. +GLYPH The IT security officer has the user profile of SECURITY. She is not part of any group. +GLYPH The online banking web application uses the user profile WEBUSER. This profile is part of the CUSTOMER group. Any future customer-facing applications can also use this group if needed. +GLYPH Adam O. Olsen is a bank customer with a web application login ID of KLD72CQR8JG. +4.3 Implementation of RCAC +Figure 4-4 shows the data model of the banking scenario that is used in this example. +Figure 4-4 Data model of the banking scenario +
+ +Figure 4-4 Data model of the banking scenario +
+This section covers the following steps: +GLYPH Reviewing the tables that are used in this example +GLYPH Assigning function ID QIBM_DB_SECADM to the Database Engineers group +GLYPH Creating group profiles for the users and their roles +GLYPH Creating the CUSTOMER_LOGIN_ID global variable +GLYPH Defining and creating row permissions +GLYPH Defining and creating column masks +GLYPH Restricting the inserting and updating of masked data +GLYPH Activating row and column access control +GLYPH Reviewing row permissions +GLYPH Demonstrating data access with RCAC +GLYPH Query implementation with RCAC activated +4.3.1 Reviewing the tables that are used in this example +This section reviews the tables that are used in this example. As shown in Figure 4-5, there are three main tables that are involved in the data model: CUSTOMERS, ACCOUNTS, and TRANSACTIONS. There are 90 customers. +Figure 4-5 Tables that are used in the banking example +Note: Appendix A, "Database definitions for the RCAC banking example" on page 121 provides a script that you can use to create all the database definitions or DDLs to re-create this RCAC example. +To review the attributes of each table that is used in this banking example, complete the following steps: +1. Review the columns of each the tables through System i Navigator. Expand Database  named Database  Schemas  BANK_SCHEMA  Tables . +2. Right-click the CUSTOMERS table and select Definition . Figure 4-6 shows the attributes for the CUSTOMERS table. The Row access control and Column access control options are not selected, which indicates that the table does not have RCAC implemented. +Figure 4-6 CUSTOMERS table attributes +
+ +Figure 4-6 CUSTOMERS table attributes +
+3. Click the Columns tab to see the columns of the CUSTOMERS table, as shown in Figure 4-7. +Figure 4-7 Column definitions of the CUSTOMERS table +4. Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the CUSTOMERS table, as shown in Figure 4-8. There are no Foreign Key Constraints or Check Constraints on the CUSTOMERS table. +Figure 4-8 Reviewing the constraints on the CUSTOMERS table +
+ +Figure 4-8 Reviewing the constraints on the CUSTOMERS table +
+5. Review the definition of the ACCOUNTS table. The definition of the ACCOUNTS table is shown in Figure 4-9. RCAC has not been defined for this table yet. +Figure 4-9 ACCOUNTS table attributes +
+ +Figure 4-9 ACCOUNTS table attributes +
+6. Click the Columns tab to see the columns of the ACCOUNTS table, as shown in Figure 4-10. +Figure 4-10 Column definitions of the ACCOUNTS table +7. Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the ACCOUNTS table, as shown in Figure 4-11. There is one Foreign Key Constraint and no Check Constraints on the ACCOUNTS table. +Figure 4-11 Reviewing the constraints on the ACCOUNTS table +8. Review the definition of the TRANSACTIONS table. The definition of the TRANSACTIONS table is shown in Figure 4-12. RCAC is not defined for this table yet. +Figure 4-12 TRANSACTIONS table attributes +
+ +Figure 4-12 TRANSACTIONS table attributes +
+9. Click the Columns tab to see the columns of the TRANSACTIONS table, as shown in Figure 4-13. +Figure 4-13 Column definitions of the TRANSACTIONS table +10.Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the TRANSACTIONS table, as shown in Figure 4-14. There is one Foreign Key Constraint and one Check Constraint on the TRANSACTIONS table. +Figure 4-14 Reviewing the constraints on the TRANSACTIONS table +Now that you have reviewed the database model for this example, the following sections describe the steps that are required to implement RCAC in this banking scenario. +4.3.2 Assigning function ID QIBM_DB_SECADM to the Database Engineers group +The first step is to assign the appropriate function usage ID to the Database Engineers (DBEs) that will be implementing RCAC. For a description of function usage IDs, see 2.1, "Roles" on page 8. In this example, the DBEs are users MCAIN and HBEDOYA. +Complete the following steps: +1. Right-click the database connection and select Application Administration , as shown in Figure 4-15. +Figure 4-15 Application administration +
+ +Figure 4-15 Application administration +
+2. The Application Administration window opens, as shown in Figure 4-16. Click IBM i  Database and select the function usage ID of Database Security Administrator . +Figure 4-16 Application administration for IBM i +
+ +Figure 4-16 Application administration for IBM i +
+3. Click Customize for the function usage ID of Database Security Administrator, as shown in Figure 4-17. +Figure 4-17 Customizing the Database Security Administrator function usage ID +
+ +Figure 4-17 Customizing the Database Security Administrator function usage ID +
+4. The Customize Access window opens, as shown in Figure 4-18. Click the users that need to implement RCAC. For this example, HBEDOYA and MCAIN are selected. Click Add and then click OK . +Figure 4-18 Customize Access window +
+ +Figure 4-18 Customize Access window +
+5. The Application Administrator window opens again. The function usage ID of Database Security Administrator now has an X in the Customized Access column, as shown in Figure 4-19. +Figure 4-19 Function usage ID Database Security Administrator customized +
+ +Figure 4-19 Function usage ID Database Security Administrator customized +
+6. Run an SQL query that shows which user profiles are enabled to define RCAC. The SQL query is shown in Figure 4-20. +Figure 4-20 Query to display user profiles with function usage ID for RCAC +4.3.3 Creating group profiles for the users and their roles +The next step is to create the different group profiles (ADMIN, CUSTOMER, TELLER, and DBE) and assign the different user profiles to the different group profiles. For a description of the different groups and users for this example, see 4.2, "Description of the users roles and responsibilities" on page 39. +Complete the following steps: +1. On the main navigation pane of System i Navigator, right-click Groups and select New Group , as shown in Figure 4-21. +Figure 4-21 Creating group profiles +
+ +Figure 4-21 Creating group profiles +
+2. The New Group window opens, as shown in Figure 4-22. For each new group, enter the Group name (ADMIN, CUSTOMER, TELLER, and DBE) and add the user profiles that are associated to this group by selecting the user profile and clicking Add . +Figure 4-22 shows adding user TQSPENCER to the TELLER group profile. +Figure 4-22 Creating group profiles and adding users +
+ +Figure 4-22 Creating group profiles and adding users +
+3. After you create all the group profiles, you should see them listed in System i Navigator under Users and Groups  Groups , as shown in Figure 4-23. +Figure 4-23 Newly created group profiles +
+ +Figure 4-23 Newly created group profiles +
+4.3.4 Creating the CUSTOMER_LOGIN_ID global variable +In this step, you create a global variable that is used to capture the Customer_Login_ID information, which is required to validate the permissions. For more information about global variables, see 3.2.2, "Built-in global variables" on page 19. +Complete the following steps: +1. From System i Navigator, under the schema Bank_Schema, right-click Global Variable and select New  Global Variable , as shown in Figure 4-24. +Figure 4-24 Creating a global variable +
+ +Figure 4-24 Creating a global variable +
+2. The New Global Variable window opens, as shown in Figure 4-25. Enter the global variable name of CUSTOMER_LOGIN_ID, select the data type of VARCHAR, and leave the default value of NULL. This default value ensures that users that do not use the web interface do not have permission to access the data. Click OK . +Figure 4-25 Creating a global variable called CUSTOMER_LOGIN_ID +
+ +Figure 4-25 Creating a global variable called CUSTOMER_LOGIN_ID +
+3. Now that the global variable is created, assign permissions to the variable so that it can be set by the program. Right-click the CUSTOMER_LOGIN_ID global variable and select Permissions , as shown in Figure 4-26. +Figure 4-26 Setting permissions on the CUSTOMER_LOGIN_ID global variable +
+ +Figure 4-26 Setting permissions on the CUSTOMER_LOGIN_ID global variable +
+4. The Permissions window opens, as shown in Figure 4-27. Select Change authority for Webuser so that the application can set this global variable. +Figure 4-27 Setting change permissions for Webuser on the CUSTOMER_LOGIN_ID global variable +
+ +Figure 4-27 Setting change permissions for Webuser on the CUSTOMER_LOGIN_ID global variable +
+4.3.5 Defining and creating row permissions +You now ready to define the row permissions of the tables. Complete the following steps: +1. From the navigation pane of System i Navigator, click Schemas  BANK_SCHEMA , right-click Row Permissions , and select New  Row Permission , as shown in Figure 4-28. +Figure 4-28 Selecting new row permissions +
+ +Figure 4-28 Selecting new row permissions +
+2. The New Row Permission window opens, as shown in Figure 4-29. Enter the information regarding the row permissions on the CUSTOMERS table. This row permission defines what is established in the following policy: +-User profiles that belong to DBE, ADMIN, and TELLER group profiles can see all the rows. +-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table. +-Any other user profile cannot see any rows at all. +Select the Enabled option. Click OK . +Figure 4-29 New row permissions on the CUSTOMERS table +
+ +Figure 4-29 New row permissions on the CUSTOMERS table +
+3. Define the row permissions for the ACCOUNTS table. The New Row Permission window opens, as shown in Figure 4-30. Enter the information regarding the row permissions on the ACCOUNTS table. This row permission defines what is established in the following policy: +-User profiles that belong to DBE, ADMIN and TELLER group profiles can see all the rows. +-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table. +-Any other user profile cannot see any rows at all. +Select the Enabled option. Click OK . +Figure 4-30 New row permissions on the ACCOUNTS table +
+ +Figure 4-30 New row permissions on the ACCOUNTS table +
+4. Define the row permissions on the TRANSACTIONS table. The New Row Permission window opens, as shown in Figure 4-31. Enter the information regarding the row permissions on the TRANSACTIONS table. This row permission defines what is established in the following policy: +-User profiles that belong to DBE, ADMIN, and TELLER group profiles can see all of the rows. +-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table. +Note: You must join back to ACCOUNTS and then to CUSTOMERS by using a subquery to check whether the global variable matches CUSTOMER_LOGIN_ID. Also, if the row permission or column mask rule text references another table with RCAC defined, the RCAC for the referenced table is ignored. +-Any other user profile cannot see any rows at all. Select the Enabled option. Click OK . +Figure 4-31 New row permissions on the TRANSACTIONS table +
+ +Figure 4-31 New row permissions on the TRANSACTIONS table +
+5. To verify that the row permissions are enabled, from System i Navigator, click Row Permissions , as shown in Figure 4-32. The three row permissions are created and enabled. +Figure 4-32 List of row permissions on BANK_SCHEMA +
+ +Figure 4-32 List of row permissions on BANK_SCHEMA +
+4.3.6 Defining and creating column masks +This section defines the masks on the columns. Complete the following steps: +1. From the main navigation pane of System i Navigator, click Schemas  BANK_SCHEMA , right-click Column Masks , and select New  Column Mask , as shown in Figure 4-33. +Figure 4-33 Creating a column mask +
+ +Figure 4-33 Creating a column mask +
+2. In the New Column Mask window, which is shown in Figure 4-34, enter the following information: +-Select the CUSTOMERS table on which to create the column mask. +-Select the Column to mask; in this example, it is CUSTOMER_EMAIL. +-Define the masking logic depending on the rules that you want to enforce. In this example, either the ADMIN or CUSTOMER group profiles can see the entire email address; otherwise, it is masked to ****@****. +Select the Enabled option. Click OK . +Figure 4-34 Defining a column mask on the CUSTOMERS table +
+ +Figure 4-34 Defining a column mask on the CUSTOMERS table +
+3. Repeat steps 1 on page 58 and 2 to create column masks for the following columns: +-MASK_DRIVERS_LICENSE_ON_CUSTOMERS +-MASK_LOGIN_ID_ON_CUSTOMERS +-MASK_SECURITY_QUESTION_ANSWER_ON_CUSTOMERS +-MASK_ACCOUNT_NUMBER_ON_ACCOUNTS +-MASK_SECURITY_QUESTION_ON_CUSTOMERS +-MASK_TAX_ID_ON_CUSTOMERS +4. To verify that the column masks are enabled, from System i Navigator, click Column Masks , as shown in Figure 4-35. The seven column masks are created and enabled. +Figure 4-35 List of column masks on BANK_SCHEMA +
+ +Figure 4-35 List of column masks on BANK_SCHEMA +
+4.3.7 Restricting the inserting and updating of masked data +This step defines the check constraints that support the column masks to make sure that on INSERTS or UPDATES, data is not written with a masked value. For more information about the propagation of masked data, see 6.8, "Avoiding propagation of masked data" on page 108. +Complete the following steps: +1. Create a check constraint on the column CUSTOMER_EMAIL in the CUSTOMERS table. From the navigation pane of System i Navigator, right-click the CUSTOMERS table and select Definition , as shown Figure 4-36 +Figure 4-36 Definition of the CUSTOMERS table +
+ +Figure 4-36 Definition of the CUSTOMERS table +
+2. From the CUSTOMERS definition window, click the Check Constraints tab and click Add , as shown in Figure 4-37. +Figure 4-37 Adding a check constraint +
+ +Figure 4-37 Adding a check constraint +
+3. The New Check Constraint window opens, as shown in Figure 4-38. Complete the following steps: +a. Select the CUSTOMER_EMAIL column. +b. Enter the check constraint condition. In this example, specify CUSTOMER_EMAIL to be different from ****@****, which is the mask value. +c. Select the On update violation, preserve column value option and click OK . +Figure 4-38 Specifying a new check constraint on the CUSTOMERS table +
+ +Figure 4-38 Specifying a new check constraint on the CUSTOMERS table +
+4. Figure 4-39 shows that there is now a check constraint on the CUSTOMERS table that prevents any masked data from being updated to the CUSTOMER_EMAIL column. +Figure 4-39 Check constraint on the CUSTOMERS table +
+ +Figure 4-39 Check constraint on the CUSTOMERS table +
+5. Create all the other check constraints that are associated to each of the masks on the CUSTOMERS table. After this is done, these constraints should look like the ones that are shown in Figure 4-40. +Figure 4-40 List of check constraints on the CUSTOMERS table +
+ +Figure 4-40 List of check constraints on the CUSTOMERS table +
+4.3.8 Activating row and column access control +You are now ready to activate RCAC on all three tables in this example. Complete the following steps: +1. Start by enabling RCAC on the CUSTOMERS table. From System i Navigator, right-click the CUSTOMERS table and select Definition . As shown in Figure 4-41, make sure that you select Row access control and Column access control . Click OK . +Figure 4-41 Enabling RCAC on the CUSTOMERS table +
+ +Figure 4-41 Enabling RCAC on the CUSTOMERS table +
+2. Enable RCAC on the ACCOUNTS table. Right-click the ACCOUNTS table and select Definition . As shown Figure 4-42, make sure that you select Row access control and Column access control . Click OK . +Figure 4-42 Enabling RCAC on ACCOUNTS +
+ +Figure 4-42 Enabling RCAC on ACCOUNTS +
+3. Enable RCAC on the TRANSACTIONS table. Right-click the TRANSACTIONS table and select Definition . As shown in Figure 4-43, make sure that you select Row access control . Click OK . +Figure 4-43 Enabling RCAC on TRANSACTIONS +
+ +Figure 4-43 Enabling RCAC on TRANSACTIONS +
+4.3.9 Reviewing row permissions +This section displays all the row permissions after enabling RCAC. Complete the following steps: +1. From System i Navigator, click Row Permissions , as shown in Figure 4-44. Three additional Row Permissions are added (QIBM_DEFAULT*). There is one per each row permission. +Figure 4-44 Row permissions after enabling RCAC +
+ +Figure 4-44 Row permissions after enabling RCAC +
+2. Look at one of the row permission definitions by right-clicking it and selecting Definition , as shown in Figure 4-45. +Figure 4-45 Selecting row permission definition +
+ +Figure 4-45 Selecting row permission definition +
+3. A window opens, as shown in Figure 4-46. Take note of the nonsensical search condition (0=1) of the QIBM_DEFAULT row permission. This permission is ORed with all of the others and it ensures that if someone does not meet any of the criteria from the row permission then this condition is tested, and because it is false the access is denied. +Figure 4-46 Search condition of the QIBM_DEFAULT row permission +
+ +Figure 4-46 Search condition of the QIBM_DEFAULT row permission +
+4.3.10 Demonstrating data access with RCAC +You are now ready to test the RCAC definitions. Run the following SQL statements with each type of user (DBE, SECURITY, TELLER, ADMIN, and WEBUSER): +GLYPH A SELECT statement that returns the SESSION_USER. +GLYPH A SELECT statement that counts the customers from the CUSTOMER table. There are 90 customers in the CUSTOMER table. +GLYPH A simple SELECT statement that returns the following output from the CUSTOMERS table ordered by customer_name: +-c u s t o m e r _ i d +-customer_name +-customer_email +-c u s t o m e r _ t a x _ i d +-customer_drivers_license_number +Data access for a DBE user with RCAC +To test a DBE (MCAIN) user, complete the following steps: +1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-47. In this example, MCAIN is the DBE user. +Figure 4-47 DBE session user +2. The number of rows that the DBE user MCAIN can see is shown in Figure 4-48. +Figure 4-48 Number of rows that DBE user can see in the CUSTOMERS table +3. The result of the third SQL statement is shown in Figure 4-49. Note the masked columns. User MCAIN can see all the rows in the CUSTOMERS table, but there are some columns where the result is masked. +Figure 4-49 SQL statement that is run by the DBE user with masked columns +Data access for SECURITY user with RCAC +To test a SECURITY user, complete the following steps: +1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-50. In this example, SECURITY is the security officer. +Figure 4-50 SECURITY session user +
+ +Figure 4-50 SECURITY session user +
+2. The number of rows in the CUSTOMERS table that the security officer can see is shown in Figure 4-51. The security officer cannot see any data at all. +Figure 4-51 Number of rows that the security officer can see in the CUSTOMERS table +3. The result of the third SQL statement is shown in Figure 4-52. Note the empty set that is returned to the security officer. +Figure 4-52 SQL statement that is run by the SECURITY user - no results +
+ +Figure 4-52 SQL statement that is run by the SECURITY user - no results +
+Data access for TELLER user with RCAC +To test a Teller (TQSPENCER) user, complete the following steps: +1. Confirm that the TELLER user is the user of the session by running the first SQL statement, as shown in Figure 4-53. In this example, TQSPENCER is a TELLER user. +Figure 4-53 TELLER session user +2. The number of rows in the CUSTOMERS table that the TELLER user can see is shown in Figure 4-54. The TELLER user can see all the rows. +Figure 4-54 Number of rows that the TELLER user can see in the CUSTOMERS table +
+ +Figure 4-54 Number of rows that the TELLER user can see in the CUSTOMERS table +
+3. The result of the third SQL statement is shown in Figure 4-55. Note the masked columns. The TELLER user, TQSPENSER, can see all the rows, but there are some columns where the result is masked. +Figure 4-55 SQL statement that is run by the TELLER user with masked columns +Data access for ADMIN user with RCAC +To test an ADMIN (VGLUCCHESS) user, complete the following steps: +1. Confirm that the ADMIN user is the user of the session by running the first SQL statement, as shown in Figure 4-56. In this example, VGLUCCHESS is an ADMIN user. +Figure 4-56 ADMIN session user +
+ +Figure 4-56 ADMIN session user +
+2. The number of rows that the ADMIN user can see is shown in Figure 4-57. The ADMIN user can see all the rows. +Figure 4-57 Number of rows that the ADMIN can see in the CUSTOMERS table +3. The result of the third SQL statement is shown in Figure 4-58. There are no masked columns. +Figure 4-58 SQL statement that is run by the ADMIN user - no masked columns +Data access for WEBUSER user with RCAC +To test a CUSTOMERS (WEBUSER) user that accesses the database by using the web application, complete the following steps: +1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-59. In this example, WEBUSER is a CUSTOMER user. +Figure 4-59 WEBUSER session user +
+ +Figure 4-59 WEBUSER session user +
+2. A global variable (CUSTOMER_LOGIN_ID) is set by the web application and then is used to check the row permissions. Figure 4-60 shows setting the global variable by using the customer login ID. +Figure 4-60 Setting the global variable CUSTOMER_LOGIN_ID +
+ +Figure 4-60 Setting the global variable CUSTOMER_LOGIN_ID +
+3. Verify that the global variable was set with the correct value by clicking the Global Variable tab, as shown in Figure 4-61. +Figure 4-61 Viewing the global variable value +
+ +Figure 4-61 Viewing the global variable value +
+4. The number of rows that the WEBUSER can see is shown in Figure 4-62. This user can see only the one row that belongs to his web-based user ID. +Figure 4-62 Number of rows that the WEBUSER can see in the CUSTOMERS table +
+ +Figure 4-62 Number of rows that the WEBUSER can see in the CUSTOMERS table +
+5. The result of the third SQL statement is shown in Figure 4-63. There are no masked columns, and the user can see only one row, which is the user's own row. +Figure 4-63 SQL statement that is run by WEBUSER - no masked columns +Other examples of data access with RCAC +To run an SQL statement that lists all the accounts and current balance by customer, complete the following steps: +1. Run the SQL statement that is shown in Figure 4-64 using the WEBUSER user profile. The SQL statement has no WHERE clause, but the WEBUSER can see only his accounts. +Figure 4-64 List of accounts and current balance by customer using the WEBUSER user profile +2. Figure 4-65 shows running a more complex SQL statement that calculates transaction total by account for year and quarter. Run this statement using the WEBUSER profile. The SQL statement has no WHERE clause, but the WEBUSER user can see only his transactions. +Figure 4-65 Calculate transaction total by account for year and quarter using the WEBUSER profile +3. Run the same SQL statement that lists the accounts and current balance by customer, but use a TELLER user profile. The result of this SQL statement is shown in Figure 4-66. The TELLER user can see all the rows in the CUSTOMERS table. +Figure 4-66 List of accounts and current balance by customer using a TELLER user profile +4.3.11 Query implementation with RCAC activated +This section looks at some other interesting information that is related to RCAC by comparing the access plans of the same SQL statement without RCAC and with RCAC. This example uses Visual Explain and runs an SQL statement that lists the accounts and current balance by customer. +Complete the following steps: +1. Figure 4-67 shows the SQL statement in Visual Explain ran with no RCAC. The implementation of the SQL statement is a two-way join, which is exactly what the SQL statement is doing. +Figure 4-67 Visual Explain with no RCAC enabled +
+ +Figure 4-67 Visual Explain with no RCAC enabled +
+2. Figure 4-68 shows the Visual Explain of the same SQL statement, but with RCAC enabled. It is clear that the implementation of the SQL statement is more complex because the row permission rule becomes part of the WHERE clause. +Figure 4-68 Visual Explain with RCAC enabled +
+ +Figure 4-68 Visual Explain with RCAC enabled +
+3. Compare the advised indexes that are provided by the Optimizer without RCAC and with RCAC enabled. Figure 4-69 shows the index advice for the SQL statement without RCAC enabled. The index being advised is for the ORDER BY clause. +Figure 4-69 Index advice with no RCAC +
+ +Figure 4-69 Index advice with no RCAC +
+4. Now, look at the advised indexes with RCAC enabled. As shown in Figure 4-70, there is an additional index being advised, which is basically for the row permission rule. For more information, see 6.4.2, "Index advisor" on page 99. +Figure 4-70 Index advice with RCAC enabled +
+ +Figure 4-70 Index advice with RCAC enabled +
+
+ +
+Chapter 5. +5 +RCAC and non-SQL interfaces +A benefit of Row and Column Access Control (RCAC) is that its security controls are enforced across all the interfaces that access DB2 for i because the security rules are defined and enforced at the database level. The examples that are shown in this paper focus on SQL-based access, but row permissions and column masks also are enforced for non-SQL interfaces, such as native record-level access in RPG and COBOL programs and CL commands, such as Display Physical File Member ( DSPPFM ) and Copy File ( CPYF ). +This consistent enforcement across all interfaces is a good thing, but there are some nuances and restrictions as a result of applying an SQL-based technology such as RCAC to non-SQL interfaces. These considerations are described in this chapter. +The following topics are covered in this chapter in this chapter: +GLYPH Unsupported interfaces +GLYPH Native query result differences +GLYPH Accidental updates with masked values +GLYPH System CL commands considerations +5.1 Unsupported interfaces +It is not possible to create a row permission or column mask on a distributed table or a program-described file. +After a row permission or column mask is added to a table, there are some data access requests that no longer work. An attempt to open or query a table with activated RCAC controls involving any of the following scenarios is rejected with the CPD43A4 error message: +GLYPH A logical file with multiple formats if the open attempt requests more than one format. +GLYPH A table or query that specifies an ICU 2.6.1 sort sequence. +GLYPH A table with read triggers. +This unsupported interface error occurs when a table with RCAC controls is accessed, not when the RCAC control is created and activated. +For example, assume that there is a physical file, PF1, which is referenced by a single format logical file (LFS) and a multi-format logical file (LFM). A row permission is successfully created and activated for PF1. Any application that accesses PF1 directly or LFS continues to work without any issues. However, any application that opens LFM with multiple formats receives an error on the open attempt after the row permission is activated for PF1. +Important: This potential runtime error places a heavy emphasis on a comprehensive testing plan to ensure that all programs are tested. If testing uncovers an unsupported interface, then you must investigate whether the application can be rewritten to use a data access interface that is supported by RCAC. +5.2 Native query result differences +The SQL Query Engine (SQE) is the only engine that is enhanced by IBM to enforce RCAC controls on query requests. In order for native query requests to work with RCAC, these native query requests are now processed by SQE instead of the Classic Query Engine (CQE). Native query requests can consist of the following items: +GLYPH Query/400 +GLYPH QQQQRY API +GLYPH Open Query File ( OPNQRYF ) command +GLYPH Run Query ( RUNQRY ) command +GLYPH Native open (RPG, COBOL, OPNDBF, and so on) of an SQL view +Legacy queries that have been running without any issues for many years and over many IBM i releases are now processed by a different query engine. As a result, the runtime behavior and results that are returned can be different for native query requests with RCAC enabled. The OPNQRYF command and Query/400 run with SQE by default. +The following list documents some of the query output differences that can occur when native query requests are processed by CQE: +GLYPH Different ordering in the result set +GLYPH Different values for null columns or columns with errors +GLYPH Suppression of some mapping error messages +GLYPH Loss of RRN positioning capabilities +GLYPH Duplicate key processing behavior differences +GLYPH Missing key feedback +For a list of the differences and additional details, see the IBM i Memo to Users Version 7.2 , found at: +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgmtu.htm +In addition, the performance of a native query with SQE can be different. It is possible that a new index or keyed logical file might need to be created to improve the performance. +Important: Based on the potential impacts of query result set and performance differences, you should perform extensive functional testing and performance benchmarking of applications and reports that use native query interfaces. +5.3 Accidental updates with masked values +The masked values that are returned by a column mask can potentially cause the original data value to be accidentally overwritten, especially with applications using native record-level access. +For example, consider a table containing three columns of first name, last name, and tax ID that is read by an RPG program. The user running the program is not authorized to see the tax ID value, so a masked value (*****3333) is written into the program's record buffer, as shown Figure 5-1. +In this example, the application reads the data for an update to correct the misspelling of the last name. The last name value is changed to Smith in the buffer. Now, a WRITE request is issued by the program, which uses the contents of the record buffer to update the row in the underlying DB2 table. Unfortunately, the record buffer still contains a masked value for the tax ID, so the tax ID value in the table is accidentally set to the masked value. +Figure 5-1 Accidental update with masked values scenario +
+ +Figure 5-1 Accidental update with masked values scenario +
+Obviously, careful planning and testing should be exercised to avoid accidental updates with masked values. +DB2 for i also enhanced its check constraint support in the IBM i 7.2 release with a new ON UPDATE clause that allows the existing value to be preserved when a masked value is detected by a check constraint. Details about how to employ this new check constraint support can be found in 6.8.1, "Check constraint solution" on page 108. +5.4 System CL commands considerations +As stated earlier, RCAC controls are enforced on all data access interfaces. This enforcement is not limited to programmatic interfaces; it also includes system CL commands that read and insert data, such as the Create Duplicate Object ( CRTDUPOBJ ) and Start DFU ( STRDFU ) CL commands. This section documents the behavior of the Create Duplicate Object ( CRTDUPOBJ ), Copy File ( CPYF ), and Copy Library ( CPYLIB ) CL commands with RCAC. +5.4.1 Create Duplicate Object (CRTDUPOBJ) command +The CRTDUPOBJ command is enhanced with a new Access Control ( ACCCTL ) parameter in the IBM i 7.2 release to copy RCAC controls to the new object being created. Row permissions and column masks are copied to the new object by default because the default value for the ACCCTL parameter is *ALL . +If the invoker of the CRTDUPOBJ command asks for data to be copied with a value of *YES for the DATA parameter, the value of the ACCCTL parameter must be *ALL . If not, the command invocation receives an error. +When data is copied to the duplicated object with the DATA parameter, all rows and unmasked column values are copied into the new object, even if the command invoker is not authorized to view all rows or certain column values. This behavior occurs because the RCAC controls also are copied to the new object. The copied RCAC controls enforce that only authorized users are allowed to view row and column values in the newly duplicated object. +5.4.2 Copy File (CPYF) command +The CPYF command copies only data, so there is no new parameter to copy RCAC controls to the target table. Therefore, if CPYF is used to create a target table, there are no RCAC controls placed on the target table. +When RCAC controls are in place on the source table, the CPYF command is limited to reading rows and column values that are based on the invoker of the CPYF command. If a user is authorized to see all rows and column values, then all rows and unmasked column values are copied to the target table (assuming no RCAC controls are on the target table). If a user without full access runs the CPYF command, the CPYF command can copy only a subset of the rows into the target table. In addition, if that user can view only masked column values, then masked values are copied into the target table. This also applies to the Copy to Import File ( CPYTOIMPF ) command. +If the target table has RCAC controls defined and activated, then the CPYF command is allowed only to add or replace rows in the target table based on the RCAC controls. If CPYF tries to add a row to the target table that the command invoker is not allowed to view according to the target RCAC controls, then an error is received. +5.4.3 Copy Library (CPYLIB) command +The CPYLIB command is enhanced with the same Access Control ( ACCCTL ) parameter as the CRTDUPOBJ command in the IBM i 7.2 release (see 5.4.1, "Create Duplicate Object (CRTDUPOBJ) command" on page 82). Row permissions and column masks are copied to the new object in the new library by default because the default value for the ACCCTL parameter is *ALL . +
+ +
+Chapter 6. +Additional considerations +This chapter covers additional considerations that must be taken into account when implementing Row and Column Access Control (RCAC), including the following functions: +GLYPH Timing of column masking +GLYPH Data movement +GLYPH Joins +GLYPH Views +GLYPH Materialized query tables +GLYPH Index advisor +GLYPH Monitoring, analysis, and debugging +GLYPH Performance and scalability +The following topics are covered in this chapter: +GLYPH Timing of column masking +GLYPH RCAC effects on data movement +GLYPH RCAC effects on joins +GLYPH Monitoring, analyzing, and debugging with RCAC +GLYPH Views, materialized query tables, and query rewrite with RCAC +GLYPH RCAC effects on performance and scalability +GLYPH Exclusive lock to implement RCAC (availability issues) +GLYPH Avoiding propagation of masked data +GLYPH Triggers and functions (SECURED) +GLYPH RCAC is only one part of the solution +6 +6.1 Timing of column masking +An important design and implementation consideration is the fact that RCAC column masking occurs after all of the query processing is complete, which means that the query results are not at all based on the masked values. Any local selection, joining, grouping, or ordering operations are based on the unmasked column values. Only the final result set is the target of the masking. +An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values. +SELECT +FROM GROUP BY ORDER BY +Without RCAC Masking +With RCAC Masking + + +CREDIT CARD NUMBER _ _TOTAL +3785 0000 0000 1234233.50 +3785 1111 1111 1234105.10 +3785 2222 2222 1234300 00 300.00 +3785 3333 3333 12341,775.00 +5466 4444 4444 1234601.70 +5466 5555 5555 123437.80 +5466 6666 6666 1234490.45 +6011 7777 7777 12341005.00 +6011 8888 8888 1234750.33 +6011 9999 9999 000110.00 +
+Figure 6-1 Timing of column masking + + + +CREDIT CARD NUMBER _ _TOTAL +**** **** **** 1234233.50 +**** **** **** 1234105.10 +**** **** **** 1234300 00 300.00 +**** **** **** 12341,775.00 +**** **** **** 1234601.70 +**** **** **** 123437.80 +**** **** **** 1234490.45 +**** **** **** 1234 12341005.00 +**** **** **** 1234750.33 +**** **** **** 000110.00 +
Figure 6-1 Timing of column masking
+CREDIT_CARD_NUMBER, SUM(AMOUNT) AS TOTAL TRANSACTIONS +CREDIT_CARD_NUMBER +CREDIT_CARD_NUMBER; +Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2. +Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask. +Figure 6-2 Masking differences between Fieldproc and RCAC +
+ +Figure 6-2 Masking differences between Fieldproc and RCAC +
+6.2 RCAC effects on data movement +As described earlier and shown in Figure 6-3, RCAC is applied pervasively regardless of the data access programming interface, SQL statement, or IBM i command. The effects of RCAC on data movement scenarios can be profound and possibly problematic. It is important to understand these effects and make the appropriate adjustments to avoid incorrect results or data loss. +Figure 6-3 RCAC and data movement +
+ +Figure 6-3 RCAC and data movement +
+The "user" that is running the data movement application or process, whether it be a high availability (HA) scenario, an extract, transform, load (ETL) scenario, or just copying data from one file or table to another one, must have permission to all the source rows without masking, and not be restricted from putting rows into the target. Allowing the data movement application or process to bypass the RCAC rules must be based on a clear and concise understanding of the organization's object security and data access policy. Proper design, implementation, and testing are critical success factors when applying RCAC. +Important: RCAC is applied to the table or physical file access. It is not applied to the journal receiver access. Any and all database transactions are represented in the journal regardless of RCAC row permissions and column masks. This makes it essential that IBM i security is used to ensure that only authorized personnel have access to the journaled data. +This section covers in detail the following three examples: +GLYPH Effects when RCAC is defined on the source table +GLYPH Effects when RCAC is defined on the target table +GLYPH Effects when RCAC is defined on both source and target tables +6.2.1 Effects when RCAC is defined on the source table +Example 6-1 shows a simple example that illustrates the effect of RCAC as defined on the source table. +Example 6-1 INSERT INTO TARGET statement +INSERT INTO TARGET (SELECT * FROM SOURCE); +For example, given a "source" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that has the RCAC rules applied. This reduced and modified result set is inserted into the "target" table even though the query is defined as returning all rows and all columns. Instead of seven rows that are selected from the source, only three rows are returned and placed into the target, as shown in Figure 6-4. +Figure 6-4 RCAC effects on data movement from SOURCE +
+ +Figure 6-4 RCAC effects on data movement from SOURCE +
+6.2.2 Effects when RCAC is defined on the target table +Example 6-2 shows a simple example that illustrates the effect of RCAC as defined on the target table. +Example 6-2 INSERT INTO TARGET statement +INSERT INTO TARGET (SELECT * FROM SOURCE); +Given a "target" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that represents all the rows and columns. The seven row result set is inserted into the "target", and the RCAC row permission causes an error to be returned, as shown in Figure 6-5. The source rows where NAME = 'CAIN' do not satisfy the target table's permission, and therefore cannot be inserted. In other words, you are inserting data that you cannot read. +Figure 6-5 RCAC effects on data movement on TARGET +
+ +Figure 6-5 RCAC effects on data movement on TARGET +
+6.2.3 Effects when RCAC is defined on both source and target tables +Example 6-3 shows a simple example that illustrates the effect of RCAC as defined on both the source and the target tables. +Example 6-3 INSERT INTO TARGET statement +INSERT INTO TARGET (SELECT * FROM SOURCE); +Given a "source" table and a "target" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that has the RCAC rules applied. This reduced and modified result set is inserted into the "target" table even though the query is defined as returning all rows and all columns. Instead of seven rows that are selected from the source, only three rows are returned. +Although the source rows where NAME <> 'CAIN' do satisfy the target table's permission, the AMOUNT column value of 999.99 represents masked data and therefore cannot be inserted. An error is returned indicating the failure, as shown in Figure 6-6. In this scenario, DB2 is protecting against an overt attempt to insert masked data. +Figure 6-6 RCAC effects on data movement on SOURCE and TARGET +
+ +Figure 6-6 RCAC effects on data movement on SOURCE and TARGET +
+6.3 RCAC effects on joins +As mentioned previously, a fundamental concept of row permission is that it defines a logical subset of rows that a user or group of users is permitted to access and use. This subset becomes the new basis of any query against the table that has RCAC enabled. +Note: Thinking of the row permission as defining a virtual set of rows that can be operated on is the secret to understanding the effect of RCAC on any join operation. +As shown in Figure 6-7, there are two different sets, set A and set B. However, set B has a row permission that subsets the rows that a user can see. +Figure 6-7 Set A and set B with row permissions +
+ +Figure 6-7 Set A and set B with row permissions +
+6.3.1 Inner joins +Inner join defines the intersection of two data sets. For a row to be returned from the inner join query, it must appear in both sets, as shown in Figure 6-8. +Figure 6-8 Inner join without RCAC permission +
+ +Figure 6-8 Inner join without RCAC permission +
+Given that row permission serves to eliminate logically rows from one or more sets, the result set from an inner join (and a subquery) can be different when RCAC is applied. RCAC can reduce the number of rows that are permitted to be accessed by the join, as shown in Figure 6-9. +Effect of column masks on inner joins: Because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set. +Figure 6-9 Inner join with RCAC permission +
+ +Figure 6-9 Inner join with RCAC permission +
+6.3.2 Outer joins +Outer joins preserve one or both sides of two data sets. A row can be returned from the outer join query if it appears in the primary set (LEFT, RIGHT, or both in the case of FULL), as shown in Figure 6-10. Column values from the secondary set are returned if the row has a match in the primary set. Otherwise, NULL is returned for the column value by default. +Figure 6-10 Outer join without RCAC permission +
+ +Figure 6-10 Outer join without RCAC permission +
+Given that row permission serves to eliminate logically rows from one or more sets, more column values that are returned from the secondary table in outer join can be NULL when RCAC is applied, as shown in Figure 6-11. +Effect of column masks on inner joins: Because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set. +Figure 6-11 Outer join with RCAC permission +
+ +Figure 6-11 Outer join with RCAC permission +
+6.3.3 Exception joins +Exception joins preserve one side of two data sets. A row can be returned from the exception join query if it appears in the primary set (LEFT or RIGHT) and the row does not appear in the secondary set, as shown in Figure 6-12. Column values from the secondary set are returned as NULL by default. +Figure 6-12 Exception join without RCAC permission +
+ +Figure 6-12 Exception join without RCAC permission +
+Given that row permission serves to eliminate logically rows from one or more sets, more rows can appear to be exceptions when RCAC is applied, as shown in Figure 6-13. Also, because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set. +Figure 6-13 Exception join with RCAC permission +
+ +Figure 6-13 Exception join with RCAC permission +
+6.4 Monitoring, analyzing, and debugging with RCAC +It is assumed (and it is a critical success factor) that the database engineer or application developer has a thorough understanding of the DB2 for i Query Optimizer, Database Engine, and all the associated tools and techniques. +The monitoring, analyzing, and debugging process basically stays the same when RCAC row permissions or column masks are in place, with a few important differences: +GLYPH The underlying data access plan can be different and more complex based on the rule text. +GLYPH The database results can be reduced or modified based on the rule text and user profile. +GLYPH The run time of the request can be affected either positively or negatively based on the rule text. +GLYPH For high-level language record level access, query plans must be considered, and not just program code. +During analyzing and debugging, it is important to account for all of the RCAC definitions for each table or file to understand the logic and corresponding work that is associated with processing the row permissions and column masks. It is also important to realize that, depending on the user profile in effect at run time, the database actions and query results can be different. +RCAC is designed and implemented to be transparent to the user. It is possible for user "Mike" and user "Hernando" to run the exact same query, against the exact same data on the exact same system, and get different result sets. There is no error, no warning, and no indication that RCAC reduced or modified the respective answers that are returned. Furthermore, it is also likely that user "Mike" and user "Hernando" have different query run times even though it appears that everything is the same for both users. The actual query plan contains the RCAC logic, and this additional code path can alter the amount of work that is needed to produce results, based on the user running the query. +When monitoring, analyzing, and debugging a database process when RCAC is enabled, it is critical to keep as many of the "variables" the same as possible. Use a good scientific process. For example, when re-creating a problem situation running under the same user profile with the same data and under the same conditions, it is almost mandatory. Otherwise, the database behavior and query results can be different. +To successfully perform monitoring, analyzing, and debugging when RCAC is enabled likely involves changes in the security and data access policies of the organization, and require new responsibilities, authority, and oversight within the data-centric application development community. As such, establishing and staffing the position of "database engineer" becomes even more important. +6.4.1 Query monitoring and analysis tools +When monitoring and collecting metrics on database requests, DB2 for i provides additional information that indicates row permissions or column masks are being applied. This information is integrated and part of the standard tools, such as Visual Explain, SQL Plan Cache Snapshot, and SQL Performance Monitor. +Figure 6-14 shows how Visual Explain externalizes RCAC. +Figure 6-14 Visual Explain indicating that RCAC is applied +
+ +Figure 6-14 Visual Explain indicating that RCAC is applied +
+Figure 6-15 shows the main dashboard of an SQL Performance Monitor. Click Summary . +Figure 6-15 SQL Performance Monitor +
+ +Figure 6-15 SQL Performance Monitor +
+Figure 6-16 shows the summary of an SQL Performance Monitor with an indication that RCAC is applied. +Figure 6-16 SQL Performance Monitor indicating that RCAC is applied +
+ +Figure 6-16 SQL Performance Monitor indicating that RCAC is applied +
+Figure 6-17 shows the statements of an SQL Performance Monitor and how RCAC is externalized. +Figure 6-17 SQL Performance Monitor showing statements and RCAC +
+ +Figure 6-17 SQL Performance Monitor showing statements and RCAC +
+When implementing RCAC as part of a comprehensive and pervasive data access control initiative, consider that the database monitoring and analysis tools can collect literal values that are passed as part of SQL statements. These literal values can be viewed as part of the information collected. If any of the literals are based on or are used with masked columns, it is important to review the database engineer's policy for viewing these data elements. For example, supposed that column CUSTOMER_TAX_ID is deemed masked for the database engineer and the CUSTOMER_TAX_ID column is used in a predicate as follows: +WHERE CUSTOMER_TAX_ID = '123-45-7890' +The literal value of '123-45-7890' is visible to the analyst, effectively exposing sensitive information. If this is not acceptable, you must implement the SYSPROC.SET_COLUMN_ATTRIBUTE procedure. +The SET_COLUMN_ATTRIBUTE procedure sets the SECURE attribute for a column so that variable values that are used for the column cannot be seen in the SQL Performance Monitor, SQL Plan Cache Snapshot, or Visual Explain. +6.4.2 Index advisor +Because the RCAC rule text can be almost any valid SQL logic, including local selection predicates, join conditions, and subqueries, the standard query tuning techniques still apply. Without a doubt, a proper and adequate indexing strategy is a good starting point. +The index advisor is not specifically enhanced for RCAC, but because the rule text is a fully integrated part of the query plan, any opportunities for indexing is advised based on the current Query Optimizer functionality. If an index is advised because of the RCAC rule text logic, there is no RCAC reason code provided. Analyzing the query plan and the RCAC rule text provides the understanding as to why the index is being advised. +For example, the query that is shown in Figure 6-18 produces index advice for the user's predicate and the RCAC predicate. +Figure 6-18 Index advice and RCAC +
+ +Figure 6-18 Index advice and RCAC +
+In Figure 6-19, index advisor is showing an index for the ACCOUNTS and CUSTOMERS tables based on the RCAC rule text. +Figure 6-19 Index advisor based on the RCAC rule +
+ +Figure 6-19 Index advisor based on the RCAC rule +
+For more information about creating and using indexes, see IBM DB2 for i indexing methods and strategies , found at: +http://www.ibm.com/partnerworld/wps/servlet/ContentHandler/stg_ast_sys_wp_db2_i_in dexing_methods_strategies +6.4.3 Metadata using catalogs +To make the discovery and identification of RCAC row permissions and column masks programmatically, query the QSYS2.SYSCONTROLS catalog view or the QSYS2.SYSCONTROLSDEP catalog view directly. Otherwise, the System i Navigator Database graphical interface can be used interactively. +Figure 6-20 shows the QSYS2.SYSCONTROLS catalog view. +Figure 6-20 RCAC and catalogs +The SYSCONTROLS catalog view contains the following columns: +GLYPH COLUMN_NAME +GLYPH CONTROL_TYPE +GLYPH CREATE_TIME +GLYPH ENABLE +GLYPH ENFORCED +GLYPH ASP_NUMBER +GLYPH IMPLICIT +GLYPH LABEL +GLYPH LAST_ALTERED +GLYPH LONG_COMMENT +GLYPH RCAC_NAME +GLYPH RCAC_OWNER +GLYPH RCAC_SCHEMA +GLYPH RULETEXT +GLYPH SYSTEM_COLUMN_NAME +GLYPH SYSTEM_TABLE_NAME +GLYPH SYSTEM_TABLE_SCHEMA +GLYPH TABLE_NAME +GLYPH TABLE_SCHEMA +GLYPH TBCORRELATION +The SYSCONTROLSDEP catalog view contains the following columns: +GLYPH COLUMN_NAME +GLYPH CONTROL_TYPE +GLYPH IASP_NUMBER +GLYPH OBJECT_NAME +GLYPH OBJECT_SCHEMA +GLYPH OBJECT_TYPE +GLYPH PARM_SIGNATURE +GLYPH RCAC_NAME +GLYPH RCAC_SCHEMA +GLYPH SYSTEM_TABLE_NAME +GLYPH SYSTEM_TABLE_SCHEMA +For more information, see the IBM i 7.2 DB2 for i SQL Reference Guide , found at: +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/db2/rbafzintro.htm?lang =en +6.5 Views, materialized query tables, and query rewrite with RCAC +This section covers the implications to views, materialized query tables (MQTs), and query rewrite when RCAC is activated on a table. +6.5.1 Views +Any access to an SQL view that is over one or more tables that have RCAC also have those row permissions and column masking rules applied. If an SQL view has predicates, those are logically ANDed with any search condition that is specified in the permissions that are defined on the underlying tables. The view does not have to project the columns that are referenced by the permissions. Figure 6-21 shows an example of a view definition and user query. +Figure 6-21 View definition and user query +
+ +Figure 6-21 View definition and user query +
+What the query optimizer plans for and what the database engine runs is shown in the Figure 6-22. +Figure 6-22 Query rewrite with RCAC +
+ +Figure 6-22 Query rewrite with RCAC +
+6.5.2 Materialized query tables +When the query to populate a materialized query table (MQT) is run by the system on either the create table or a refresh table, and one or more source tables have RCAC defined, the row permissions and column masks are ignored. This means that the MQT has all of the data. +Because the MQT is a copy of the base table data, when a permission is created on the base table, all the related MQTs are altered to have a default row permission. This default permission prevents any of the rows from being directly queried. +When a query implicitly uses an MQT, the underlying row permissions and column masks are built into the query that uses the MQT. In order for the MQT to be used for optimization, the MQT must include any columns that are used by the row permissions and column masks. +The following example illustrates this scenario: +1. Create schema and tables: +CREATE SCHEMA Schema1; +CREATE TABLE Schema1.employee(userID varchar(128), LocationID integer, Regionid integer); +CREATE TABLE Schema1.Sales (INVOICE INTEGER NOT NULL, SALEAMT DECIMAL(5,2), TAXAMT DECIMAL(5,2), LOCATIONID INTEGER, REGIONID INTEGER); +2. Create a row permission that allows the employees to see only rows from the region they work in: +/* Create permission that only allows the employees to see rows from the region they work in */ CREATE PERMISSION Schema1.Sales_PERM1 ON schema1.sales FOR ROWS WHERE CURRENT_USER in (SELECT userId FROM schema1.employee E WHERE e.regionid = regionid) ENFORCED FOR ALL ACCESS ENABLE; +3. Create an MQT to summarize sales by location: +-- Create MQT to summarize sales by location -- This has all of the data. The schema1.sales_perm1 predicate was not applied CREATE TABLE Schema1.Location_Sales_MQT as AS (SELECT LocationID, SUM(Saleamt) as Total_Location_Sales FROM SCHEMA1.SALES GROUP BY LOCATIONID) DATA INITIALLY DEFERRED REFRESH DEFERRED +MAINTAINED BY USER; +4. Populate the MQT (permission is not applied): +/* Populate the MQT - Permission not applied here */ REFRESH TABLE Schema1.Location_Sales_MQT +The following query matches Location_Sales_MQT, but it cannot be used because it does not have column regionid, which is needed by the schema1.sales_PERM1 permission: +SELECT Locationid, sum(SALEAMT) FROM schema1.sales GROUP BY locationid; +5. Create an MQT to summarize by region and location: +-- MQT to summarize by region and location Create table schema1.Region_Location_Sales_MQT as AS (SELECT REGIONID, LocationID, SUM(Saleamt) as Total_Location_Sales FROM SCHEMA1.SALES GROUP BY REGIONID, LOCATIONID) DATA INITIALLY DEFERRED REFRESH DEFERRED MAINTAINED BY USER; +6. Populate the Region_location_Sales_MQT (permission not applied): +/* Populate the Region_location_Sales_MQT - Permission not applied here */ Refresh table schema1.Region_Location_Sales_MQT +The following query can use the Region_location_SALES_MQT because it has REGIONID, which is required for the schema1.sales_PERM1 permission: +SELECT Locationid, sum(SALEAMT) FROM schema1.sales GROUP BY locationid; +This example has the following additional implications: +GLYPH Users must be prevented from explicitly querying the MQT or a view that is created over it. Those two cases bypass the row permission and column mask rules from the underlying tables. +GLYPH If the user writes code to update incrementally an MQT, that code must be run from a user that has permission to view all of the rows and all columns in their unmasked state. Otherwise, the MQT contents are not complete and queries that implicitly use the MQT might get wrong results. +GLYPH To prevent this, a check constraint can be created to cause an error if masked data was inserted into the MQT. +6.5.3 Query rewrite +Query rewrite is a technique that the optimizer can use to change the original request to improve performance. +For example, a query that references Table1 might be rewritten to access an MQT over Table1, or it might also be optimized to access only the fields in an index that is defined over Table1 and avoid touching Table1. With RCAC, defining these rewrites can still occur, but the MQT or index also must include all columns that are needed by the row permissions or column masks that are defined on Table1. +As part of adding RCAC, the impact to these potentially significant performance optimizations must be considered. Usage of MQTs or index-only access might be reduced or eliminated by enabling RCAC. +6.6 RCAC effects on performance and scalability +As with any discussion that is related to performance and scalability, nothing is certain or guaranteed. There are always many variables that are involved. First, a good foundation of knowledge and skill is required to appreciate fully what is occurring when a database request is handled within an RCAC enabled environment. Implementing the row permission or column masks involves the query optimizer and database engine. The process that identifies the rows that you have permission to access is considered a "query", and as such a query plan must be formulated. In the case of SQL requests, the RCAC portion of the query is combined with the user's query, much like a query referencing a view. +For native record level access, this RCAC "query" is also built and used to test the permission. When a file is opened, the RCAC rule text logic is included, optimized, and run as part of the native read, write, update, or delete operation. The amount of work (and time) required to identify the record based on the user's permission is directly related to the complexity and depth of the logic that is needed to identify the records that can be returned. +A simple example to illustrate this concept is a random read using a keyed logical file (that is, an index). In its purest form, a random read uses two data access methods: index probe (find the key and RRN) and table probe (find the record using RRN). If the RCAC rule text specifies five nested subqueries to determine whether the user has access to the record, this logic must be added to the path. The subquery processing now becomes part of the original "random read" request. Instead of two simple I/Os to retrieve the record, there can be a minimum of 12 I/Os to retrieve the same record. These I/Os can be done with a result of "not found" if the user is not entitled to any of the records. +For programs that access records sequentially, in or out of key order, the added RCAC logic can have a profound effect on the performance and scalability. Reading the "next record" in order is no longer a simple matter of positioning to the next available key, as shown in Figure 6-23. +Figure 6-23 Native record access with no RCAC +
+ +Figure 6-23 Native record access with no RCAC +
+Before the record, as identified by the key, is considered available, the RCAC logic must be run. If the record is rejected by RCAC, the next record in sequence that is permissible must be identified. This spinning through the records can take a long time and uses many resources, as shown in Figure 6-24. +Figure 6-24 Native record level access with RCAC +
+ +Figure 6-24 Native record level access with RCAC +
+After the row permissions and column masks are designed and implemented, adequate performance and scalability testing are recommended. +6.7 Exclusive lock to implement RCAC (availability issues) +When defining permissions or enabling RCAC, an exclusive lock on the base table is obtained. The impact to other applications depends on the order of create permission and the alter table to activate RCAC. +Consider the following scenarios: +GLYPH Scenario 1: Adding permissions and RCAC is not enabled on the table: +-Job 1 reading data from the table (open for input) holds a *SHRRD on the member and a *SHRRD on the data. +-Job 2 adding, updating, or deleting rows from table (open for output) holds a *SHRRD on the member and a *SHRUPD on the data. +-Job 4 allocates the object and gets a *SHRRD on the file and a *EXCLRD on the data. +-Job 3 attempts to add a permission to the table. Permission is added and the pseudo-closed cursors for Job1 and Job 2 are closed. Job 4 still holds the *SHRRD on the file and *EXCLRD on the data. +The net result from Scenario 1 is that you can add permissions without having to end the applications that are reading the base table. +GLYPH Scenario 2: Altering a table to activate RCAC requires that all applications using the table be ended. The alter table requires exclusive use of the table. +GLYPH Scenario 3: Altering the table to activate RCAC before the permissions are added. The alter table requires exclusive use of the table, as in scenario 2. All applications must be ended to perform this alter. After the alter is complete, any applications trying to read data do not get any results, and attempts to insert new rows returns the following message: +SQ20471] INSERT or UPDATE does not satisfy row permissions. +To create a permission in this case requires that you end all the applications, unlike scenario 1 where permissions can be added while the applications were active. In this case, the applications must be ended to run the create permission. +6.8 Avoiding propagation of masked data +Operations such as insert or update into a table with active column access control can fail if the input data is masked data. This can happen when data to be inserted or updated contains the masked value as a result of a SELECT from a table with active column access control. +For example, assume TABLE1 and TABLE2 have active column access control and for insert, selecting data from TABLE2 returns the masked data. The following INSERT returns an error: +INSERT INTO TABLE1 SELECT * FROM TABLE2 +The masked data that is returned from the SELECT * FROM TABLE2 might not be valid input data for TABLE1 because of data type or column check constraint. +There are two ways to prevent this situation from happening: Define a check constraint or create a before trigger. +6.8.1 Check constraint solution +One way to prevent this problem is to define a check constraint. +As part of RCAC, new SQL syntax is provided to allow an action to be performed when a violation of the check constraints check condition occurs instead of giving that error. However, if the check condition is still not met after the action, a hard error is returned. A check constraint with the new on-violation-clause is allowed on both the CREATE TABLE and ALTER TABLE statements. +In the Example 6-4, the mask is defined to return a value of 'XXX-XX-nnnn' for any query that is not done by a user profile in the DBMGR group. The constraint checks that the column SSN does not have the masked value. +Example 6-4 Check constraint to avoid masked data +CREATE SCHEMA MY_LIB SET SCHEMA MY_LIB CREATE TABLE MY_LIB.EMP_INFO (COL1_name CHAR(10) WITH DEFAULT 'DEFAULT', COL2_ssn CHAR(11) WITH DEFAULT 'DEFAULT') CREATE MASK MASK_ssn ON MY_LIB.EMP_INFO FOR COLUMN COL2_ssn RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBMGR' ) = 1 THEN COL2_ssn +ELSE 'XXX-XX-'||SUBSTR(COL2_ssn,8,4) END ENABLE | /* Check constraint for the update and insert.*/ ALTER TABLE MY_LIB.EMP_INFO ADD CONSTRAINT MASK_ssn_preserve CHECK(SUBSTR(COL2_ssn,1,7)<>'XXX-XX-') -- Allow any value other than the mask ON UPDATE VIOLATION PRESERVE COL2_ssn -- Don't update the mask portion of the existing value ON INSERT VIOLATION SET COL2_ssn = DEFAULT -- for insert set this to the default value. +6.8.2 Before trigger solution +The actions that are described in Example 6-4 on page 108 for ON UPDATE VIOLATION and ON INSERT VIOLATION also can be handled by a before trigger, as shown in Example 6-5. +Example 6-5 Before trigger to avoid masked data +CREATE TRIGGER PREVENT_MASK_SSN BEFORE INSERT OR UPDATE ON MY_LIB.EMP_INFO REFERENCING NEW ROW AS N OLD ROW AS O FOR EACH ROW MODE DB2ROW SECURED WHEN(SUBSTR(N.COL2_ssn,1,7) = 'XXX-XX-') BEGIN IF INSERTING THEN SET N.COL2_ssn = DEFAULT; ELSEIF UPDATING THEN SET N.COL2_ssn = O.COL2_ssn; END IF; END +6.9 Triggers and functions (SECURED) +There are some considerations that must be considered when there are triggers and functions on tables that have RCAC enabled. The purpose of SECURE for triggers and functions is so that a user who is allowed to create a trigger or function is not necessarily able to make it SECURE themselves. This prevents the trigger/function developer from adding code that skims off data that they are not allowed to see. +6.9.1 Triggers +Triggers have access to the data in rows outside of the row permission or column masking. An after trigger has access to the new row image after the permission has allowed the update or insert to occur. Therefore, the triggers can potentially change the insert or update image value so that it violates the permission. +Any triggers that are defined on a table must be created with an attribute that designates that it is SECURED when RCAC definitions are created or altered for that table, as shown in Example 6-6. The same applies to a view that has an instead of trigger. That trigger must be secure at the point RCAC is enabled for any of the underlying tables the view is over. +Example 6-6 Trigger SECURED +/* Trigger created with the SECURED attribute */ CREATE TRIGGER PREVENT_MASK_SSN BEFORE INSERT OR UPDATE ON MY_LIB.EMP_INFO REFERENCING NEW ROW AS N OLD ROW AS O FOR EACH ROW MODE DB2ROW SECURED WHEN(SUBSTR(N.COL2_ssn,1,7) = 'XXX-XX-') BEGIN IF INSERTING THEN SET N.COL2_ssn = DEFAULT; ELSEIF UPDATING THEN SET N.COL2_ssn = O.COL2_ssn; END IF; END +6.9.2 Functions +Within a CREATE PERMISSION or CREATE MASK , a function can be called. Because that UDF has access to the data before the RCAC rules are applied, the SECURE attribute is required on that function, as shown in Example 6-7. +Example 6-7 Specifying SECURED on a function +CREATE PERMISSION SCHEMA.PERM1 ON SCHEMA.TABLE1 FOR ROWS WHERE MY_UDF(CURRENT_USER,COLUMN1) = 1 ENFORCED FOR ALL ACCESS ENABLE; CREATE FUNCTION MY_UDF (INP1 CHAR(32), INP2 INTEGER) Returns INTEGER LANGUAGE SQL CONTAINS SQL SECURED +The SECURED attribute of MY_UDF signifies that the function is considered secure for RCAC. If a function is called from an SQL statement, and references a column in a table that has RCAC, it must be declared as secure. In that case, if the secure function calls other functions, they are not validated to confirm whether they are secure. +Consider the following examples: +GLYPH Table1 has RCAC defined and enabled. SELECT MY_UDF2(Column2) from schema.table1. MY_UDF2 must be created with the SECURED attribute. If MY_UDF2 invokes MY_UDF3, there is no checking to ensure that it is also created with SECURED. NOT SECURED is the default on the create function unless SECURED is explicitly selected. +This same rule applies for any function that might be invoked with a masked column specified as an argument. +GLYPH Table2 column SSN has a column mask that is defined on it. SELECT MY_UDF4(SSN) from table2. Because SSN has a column mask that is defined, MY_UDF4 must be created with the SECURED attribute. +6.10 RCAC is only one part of the solution +When designing and implementing RCAC row permissions, special attention should be given to the effectiveness and limitations of controlling data access. Data can be housed in objects other than tables or physical files. The role and responsibility of the database user, for example, the database engineer, must be reconciled with their respective authority and access privileges. +Figure 6-25 illustrates that object level security is the first check and that RCAC permissions provide control only on tables and physical files. +Figure 6-25 Object-level security and RCAC permissions +
+ +Figure 6-25 Object-level security and RCAC permissions +
+To get access to the table and the rows, the user must pass the object level authority test and the RCAC permission test. +The IBM i journal captures the transactional data and places an image of the row in the journal receiver. If the user has access to the journal receiver, the row image can be viewed if the user has authority to the journal receiver. +Although the SQL Plan Cache data, the SQL Plan Cache Snapshot data, and the SQL Performance Monitor data do not reveal the results of queries, they can show the literal values that are passed along with the SQL statements. +The ability to monitor, analyze, debug, and tune data-centric applications effectively and efficiently requires some understanding of the underlying data, or at least the attributes of the data. The organization must be willing to reconcile the conflicting requirements of "restricting access to data", and "needing access to data". +
+ +
+Chapter 7. +7 +Row and Column Access Control management +After Row and Column Access Control (RCAC) definitions are defined and activated in a database, your management processes must be adjusted to accommodate these new security controls. This chapter highlights some of the changes that should be considered. +The following topics are covered in this chapter: +GLYPH Managing row permissions and column masks +GLYPH Managing tables with row permissions and column masks +GLYPH Monitoring and auditing function usage +7.1 Managing row permissions and column masks +This section focuses on the management of the RCAC row permissions and column masks. +7.1.1 Source management +The SQL statements that are used to define row permissions and column masks should be managed with a change management process. Ideally, you already are using a change management process for your database definitions, and that same process can be extended to cover your RCAC definitions. +If you are using SQL DDL to define your DB2 tables, then you have the option of adding the RCAC definitions to the same source file as the table definition. The benefit of this approach is that it keeps all DDL that is related to a table in a single source file. The downside is that if you must re-create only the RCAC definitions and leave the table unchanged, then you must identify and extract only the RCAC definitions from the source file. There are situations where the row permissions and column masks must be changed or re-created without changing the definition of the associated table. +7.1.2 Modifying definitions +After RCAC is activated for a table, the row permission and column mask definitions can be re-created to change the data access behavior for that table. Usage of the OR REPLACE clause on the CREATE MASK and CREATE PERMISSION SQL statements simplifies the re-creation process by folding in the deletion of the existing RCAC definition. +This capability makes it easy to change your RCAC definitions as you test the controls with your applications and identify tweaks that must be made to your RCAC implementation. However, re-creation of RCAC definitions does require an exclusive lock to be acquired on the table during the process. +7.1.3 Turning on and off +As described in 3.1.2, "Enabling and activating RCAC" on page 16, the SQL ALTER statement can turn on and off row permissions and column masks. The ALTER MASK and A LTER PERMISSION statements allow an individual row permission or column mask to be turned off with the DISABLE option and back on with the ENABLE option. The ALTER TABLE statement can deactivate enforcement of all the row permissions and column masks for a single table. +Important: Although these capabilities make it easy to temporarily turn off RCAC security so that you can make environment or application changes, these processes require an exclusive lock to be obtained on a table. Therefore, this activity must be planned carefully to avoid disruptions and outages. +7.1.4 Regenerating +DB2 also can regenerate an existing row permission or column mask. This regenerate option can be useful with more complex RCAC definitions that reference other DB2 objects. +For example, consider a row permission on an ACCOUNTS table (PERMISSION1_ON_ACCOUNTS). The ACCOUNTS table row permission references and compares columns in the CUSTOMERS table. When the definition of the CUSTOMERS table changes, DB2 does not check to determine whether the change to the CUSTOMERS table breaks the ACCOUNTS table row permission. If this table definition change does break the row permission, an error does not surface until an application tries to read rows from the ACCOUNTS table. +Instead of waiting for an application to detect this error, the REGENERATE option can be used on the ACCOUNTS row permission. The REGENERATE option returns an error if the change in the CUSTOMERS table definition causes the row permission to be invalid. In this way, the row permission can be proactively corrected before an application discovers the error. +7.2 Managing tables with row permissions and column masks +This section examines the object management considerations after RCAC is added to a DB2 table. +7.2.1 Save and restore +Row permissions and column masks are stored in the DB2 table object itself, so they are automatically saved and restored when the DB2 table object is saved and restored. Therefore, no adjustments must be made to your database backup process to accommodate RCAC. +Save and restore processing works fine with RCAC if the RCAC definition does not reference other DB2 objects other than the table over which they are defined. When the RCAC definition has dependencies on other DB2 objects, the restore process is much more challenging. +For example, assume that the BANKSCHEMA library (which is the system name or short name for the schema long name of BANK_SCHEMA) is saved and restored into a library named BANK_TEST. Recall from the example in 7.1.4, "Regenerating" on page 114 that the row permission on the ACCOUNTS table references the CUSTOMERS table (… SELECT C.CUSTOMER_ID FROM CUSTOMERS C …). After the restore operation, the ACCOUNTS row permission still references the CUSTOMERS table in BANK_SCHEMA because DB2 explicitly qualifies all object references when the row permission or column mask is created. The restore processing does not change the explicit qualification from BANK_SCHEMA to BANK_TEST. As a result, the restored ACCOUNTS row permission now depends on DB2 objects residing in a different schema, even though it was not created that way originally. For more details, see Figure 7-1. +Figure 7-1 Restoring tables to different schemas +
+ +Figure 7-1 Restoring tables to different schemas +
+The only way to fix this issue is to re-create the row permission or column mask after the restore operation. Re-creation of the row permission or column mask is required only for definitions that reference other DB2 objects, but it is simpler to re-create all of the RCAC definitions instead of a subset. For example, generate the SQL using System i Navigator, clear the "Schema qualify names for objects" and select the "OR REPLACE clause", and then run the generated script. +7.2.2 Table migration +There are several IBM i CL commands, such as Move Object ( MOVOBJ ), Create Duplicate Object ( CRTDUPOBJ ), and Copy Library ( CPYLIB ), which are used to migrate a table from one library to another one. Often, this migration is done to create different versions of the table that can be used for development or testing purposes. +The migration of a table with RCAC has the same challenges as restore processing. If the RCAC definition references other DB2 objects, then IBM i CL commands do not change the schema names that are explicitly qualified by the DB2 internal RCAC processing. +Again, re-creating the row permission or column mask is the only way to fix the issue of references to DB2 objects in other schemas. +7.3 Monitoring and auditing function usage +While establishing proper roles for users, separating duties using function usage IDs, and defining RCAC policies allows you to implement an effective and pervasive data access control scheme. How do you monitor and audit everyone who is involved in the implementation of that scheme? The answer is to use IBM i journaling. A special journal that is called QAUDJRN, also known as the audit journal , can provide a record and audit trail of many security relevant events that occur on the system, including RCAC-related events. +The tasks and operations of security administrators and database engineers who are collaborating can (and should) be effectively monitored and audited to ensure that the organization's data access control and governance policies are in place and enabled. For example, the Database Engineers can be involved in designing and developing functions and triggers that must be secured using the SECURE attribute. Otherwise, without properly securing functions and triggers, the RCAC controls can be bypassed. +A new journal entry type of "AX" for journal entry code "T" (audit trail) is now used for RCAC. More information about the journaling of RCAC operations can be found in the following documents: +GLYPH IBM i Version 7.2 Journal Management Guide , found at: +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzaki/rzakiprintthis .htm?lang=en +GLYPH IBM i Version 7.2 Security Reference Guide , found at: +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzarl/rzarlkickoff.h tm?lang=en +
+ +
+Chapter 8. +Designing and planning for success +Although successfully implementing Row and Column Access Control (RCAC) is based on knowledge and skills, designing and planning are fundamental aspects. This chapter describes the need for a deep understanding of the technology, and good design, proper planning, and adequate testing. +The following topics are covered in this chapter: +GLYPH Implementing RCAC with good design and proper planning +GLYPH DB2 for i Center of Excellence +8 +8.1 Implementing RCAC with good design and proper planning +By using RCAC, the row and column data that is returned to the requester can be controlled and governed by a set of data-centric policies that are defined with SQL and implemented within DB2 for i. +RCAC provides fine-grained access control and is complementary to IBM i object-level security. With the new RCAC feature of DB2 for i, the database engineer, in partnership with the data owner and security officer, can ensure that users have access to the data based on their level of authorization and responsibility. +This situation also can include separation of duties, such as allowing the application developers to design and implement the solutions, but restricting them from accessing the production data based on policy. Just because someone writes and owns the program, it does not mean that they have access to all the sensitive data that their program can potentially read. +This paper has described the following pervasive power and advantages of RCAC: +GLYPH Access can be controlled through simple or sophisticated logic. +GLYPH Virtually no application changes are required. +GLYPH The implementation of the access policy is part of the DB2 data access layer. +GLYPH Table data is protected regardless of the interface that is used. +GLYPH No user is inherently exempted from the access control policies. +GLYPH Groups of users can share policies and permissions. +A deep understanding of the technology, and proper planning, good design, adequate testing, and monitored deployment are critical for success. This includes the usage of quality assurance testing, and realistic performance and scalability exercises that serve to demonstrate that all of your requirements are being met. As part of the verification process, the usage of in-depth proofs of concepts and proofs of technology are recommended, if not essential. When RCAC is activated, the results of queries can change. Anticipating this change and realizing the effects of RCAC before going live are of the utmost importance. +With the ever-growing value of data, and the vast and varied database technology that is available today, it is crucial to have a person or persons on staff who specialize in data-centric design, development, and deployment. This role and responsibility falls on the database engineer. With the availability of DB2 RCAC, the importance of full-time database engineering has grown. +8.2 DB2 for i Center of Excellence +To further assist you with understanding and implementing RCAC, the DB2 for i Center of Excellence team offers an RCAC education and consulting workshop. In addition to knowledge transfer, a working session allows for a review of your data access control requirements, review of the current environment, solution ideation, and high-level solution design. +If you are interested in engaging with the DB2 for i Center of Excellence, contact Mike Cain at mcain@us.ibm.com . +
+ +
+Appendix A. +
+ +
+Database definitions for the RCAC banking example +This appendix provides the database definitions or DDLs to re-create the Row and Column Access Control (RCAC) scenario that is described in Chapter 4, "Implementing Row and Column Access Control: Banking example" on page 37. The script that is shown in Example A-1 is the DDL script that is used to implement this example. +Example A-1 DDL script to implement the RCAC banking example +/* Database Definitions for RCAC Bank Scenario */ /* Schema */ CREATE SCHEMA BANK_SCHEMA FOR SCHEMA BANKSCHEMA ; /* Global Variable */ CREATE VARIABLE BANK_SCHEMA.CUSTOMER_LOGIN_ID VARCHAR( 30) ; LABEL ON VARIABLE BANK_SCHEMA.CUSTOMER_LOGIN_ID IS 'Customer''s log in value passed by web application' ; /* Tables */ CREATE TABLE BANK_SCHEMA.CUSTOMERS ( CUSTOMER_ID FOR COLUMN CUSTO00001 INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), CUSTOMER_NAME FOR COLUMN CUSTO00002 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_ADDRESS FOR COLUMN CUSTO00003 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_CITY FOR COLUMN CUSTO00004 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_STATE FOR COLUMN CUSTO00005 CHAR(2) CCSID 37 NOT NULL , CUSTOMER_PHONE FOR COLUMN CUSTO00006 CHAR(10) CCSID 37 NOT NULL , CUSTOMER_EMAIL FOR COLUMN CUSTO00007 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_TAX_ID FOR COLUMN CUSTO00008 CHAR(11) CCSID 37 NOT NULL , CUSTOMER_DRIVERS_LICENSE_NUMBER FOR COLUMN CUSTO00012 CHAR(13) CCSID 37 DEFAULT NULL , CUSTOMER_LOGIN_ID FOR COLUMN CUSTO00009 VARCHAR(30) CCSID 37 DEFAULT NULL , CUSTOMER_SECURITY_QUESTION FOR COLUMN CUSTO00010 VARCHAR(100) CCSID 37 DEFAULT NULL , +CUSTOMER_SECURITY_QUESTION_ANSWER FOR COLUMN CUSTO00011 VARCHAR(100) CCSID 37 DEFAULT NULL , INSERT_TIMESTAMP FOR COLUMN INSER00001 TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDAT00001 TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.CUSTOMER_ID_PK PRIMARY KEY( CUSTOMER_ID ) ) ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_LOGIN_ID_UK UNIQUE( CUSTOMER_LOGIN_ID ) ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_DRIVERS_LICENSE_CHECK CHECK( CUSTOMER_DRIVERS_LICENSE_NUMBER <> '*************' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_DRIVERS_LICENSE_NUMBER ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_EMAIL_CHECK CHECK( CUSTOMER_EMAIL <> '****@****' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_EMAIL ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_LOGIN_ID_CHECK CHECK( CUSTOMER_LOGIN_ID <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_LOGIN_ID = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_LOGIN_ID ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_SECURITY_QUESTION_CHECK CHECK( CUSTOMER_SECURITY_QUESTION_ANSWER <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_SECURITY_QUESTION_ANSWER = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_SECURITY_QUESTION_ANSWER ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_SECURITY_QUESTION_ANSWER CHECK( CUSTOMER_SECURITY_QUESTION <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_SECURITY_QUESTION = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_SECURITY_QUESTION ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_TAX_ID_CHECK CHECK( CUSTOMER_TAX_ID <> 'XXX-XX-XXXX' AND SUBSTR ( CUSTOMER_TAX_ID , 1 , 7 ) <> 'XXX-XX-' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_TAX_ID ; CREATE TABLE BANK_SCHEMA.ACCOUNTS ( ACCOUNT_ID INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), CUSTOMER_ID FOR COLUMN CUSTID INTEGER NOT NULL , ACCOUNT_NUMBER FOR COLUMN ACCOUNTNO VARCHAR(50) CCSID 37 NOT NULL , ACCOUNT_NAME FOR COLUMN ACCOUNTNAM CHAR(12) CCSID 37 NOT NULL , ACCOUNT_DATE_OPENED FOR COLUMN OPENDATE DATE DEFAULT CURRENT_DATE , ACCOUNT_DATE_CLOSED FOR COLUMN CLOSEDATE DATE DEFAULT NULL , ACCOUNT_CURRENT_BALANCE FOR COLUMN ACCTBAL DECIMAL(11, 2) NOT NULL DEFAULT 0 , INSERT_TIMESTAMP FOR COLUMN INSDATE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDDATE TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.ACCOUNT_ID_PK PRIMARY KEY( ACCOUNT_ID ) ); +ALTER TABLE BANK_SCHEMA.ACCOUNTS ADD CONSTRAINT BANK_SCHEMA.ACCOUNT_CUSTOMER_ID_FK FOREIGN KEY( CUSTOMER_ID ) REFERENCES BANK_SCHEMA.CUSTOMERS ( CUSTO00001 ) ON DELETE RESTRICT ON UPDATE RESTRICT ; ALTER TABLE BANK_SCHEMA.ACCOUNTS ADD CONSTRAINT BANK_SCHEMA.ACCOUNT_NUMBER_CHECK CHECK( ACCOUNT_NUMBER <> '*****' ) ON UPDATE VIOLATION PRESERVE ACCOUNT_NUMBER ; CREATE TABLE BANK_SCHEMA.TRANSACTIONS FOR SYSTEM NAME TRANS ( TRANSACTION_ID FOR COLUMN TRANS00001 INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), ACCOUNT_ID INTEGER NOT NULL , TRANSACTION_TYPE FOR COLUMN TRANS00002 CHAR(1) CCSID 37 NOT NULL , TRANSACTION_DATE FOR COLUMN TRANS00003 DATE NOT NULL DEFAULT CURRENT_DATE , TRANSACTION_TIME FOR COLUMN TRANS00004 TIME NOT NULL DEFAULT CURRENT_TIME , TRANSACTION_AMOUNT FOR COLUMN TRANS00005 DECIMAL(11, 2) NOT NULL , INSERT_TIMESTAMP FOR COLUMN INSER00001 TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDAT00001 TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.TRANSACTION_ID_PK PRIMARY KEY( TRANSACTION_ID ) ) ; ALTER TABLE BANK_SCHEMA.TRANSACTIONS ADD CONSTRAINT BANK_SCHEMA.TRANSACTIONS_ACCOUNT_ID_FK FOREIGN KEY( ACCOUNT_ID ) REFERENCES BANK_SCHEMA.ACCOUNTS ( ACCOUNT_ID ) ON DELETE RESTRICT ON UPDATE RESTRICT ; /* Permissions and Masks */ CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ) ) ENFORCED FOR ALL ACCESS ENABLE ; CREATE MASK BANK_SCHEMA.MASK_EMAIL_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_EMAIL RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_EMAIL WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_EMAIL ELSE '****@****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_TAX_ID_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_TAX_ID RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 +THEN C . CUSTOMER_TAX_ID WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN ( 'XXX-XX-' CONCAT QSYS2 . SUBSTR ( C . CUSTOMER_TAX_ID , 8 , 4 ) ) WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_TAX_ID ELSE 'XXX-XX-XXXX' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_DRIVERS_LICENSE_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_DRIVERS_LICENSE_NUMBER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER ELSE '*************' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_LOGIN_ID_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_LOGIN_ID RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_LOGIN_ID WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_LOGIN_ID ELSE '*****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_SECURITY_QUESTION_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_SECURITY_QUESTION RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION ELSE '*****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_SECURITY_QUESTION_ANSWER_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_SECURITY_QUESTION_ANSWER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION_ANSWER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION_ANSWER ELSE '*****' END ENABLE ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL ; +CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_ACCOUNTS ON BANK_SCHEMA.ACCOUNTS AS A FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( A . CUSTOMER_ID IN ( SELECT C . CUSTOMER_ID FROM BANK_SCHEMA . CUSTOMERS C WHERE C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ENFORCED FOR ALL ACCESS ENABLE ; CREATE MASK BANK_SCHEMA.MASK_ACCOUNT_NUMBER_ON_ACCOUNTS ON BANK_SCHEMA.ACCOUNTS AS A FOR COLUMN ACCOUNT_NUMBER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN A . ACCOUNT_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN A . ACCOUNT_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN A . ACCOUNT_NUMBER ELSE '*****' END ENABLE ; ALTER TABLE BANK_SCHEMA.ACCOUNTS ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL ; CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_TRANSACTIONS ON BANK_SCHEMA.TRANSACTIONS AS T FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( T . ACCOUNT_ID IN ( SELECT A . ACCOUNT_ID FROM BANK_SCHEMA . ACCOUNTS A WHERE A . CUSTOMER_ID IN ( SELECT C . CUSTOMER_ID FROM BANK_SCHEMA . CUSTOMERS C WHERE C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ENFORCED FOR ALL ACCESS ENABLE ; ALTER TABLE BANK_SCHEMA.TRANSACTIONS ACTIVATE ROW ACCESS CONTROL ; /* END */ +Related publications +The publications that are listed in this section are considered suitable for a more detailed description of the topics that are covered in this paper. +Other publications +These publications are relevant as further information sources: +GLYPH IBM DB2 for i indexing methods and strategies white paper: +http://www.ibm.com/partnerworld/wps/servlet/ContentHandler/stg_ast_sys_wp_db2_i _indexing_methods_strategies +GLYPH IBM i Memo to Users Version 7.2 : +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgmtu.htm +GLYPH IBM i Version 7.2 DB2 for i SQL Reference Guide : +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/db2/rbafzintro.htm?l ang=en +GLYPH IBM i Version 7.2 Journal Management Guide : +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzaki/rzakiprintthis .htm?lang=en +GLYPH IBM i Version 7.2 Security Reference Guide : +http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzarl/rzarlkickoff.h tm?lang=en +Online resources +These websites are relevant as further information sources: +GLYPH Database programming topic of the IBM i 7.2 IBM Knowledge Center: http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgdbp.htm?l ang=en +GLYPH Identity Theft Resource Center +http://www.idtheftcenter.org +GLYPH Ponemon Institute +http://www.ponemon.org/ +Help from IBM +IBM Support and downloads ibm.com /support IBM Global Services ibm.com /services +Back cover +Row and Column Access Control Support in IBM DB2 for i +Implement roles and separation of duties +Leverage row permissions on the database +Protect columns by defining column masks +This IBM Redpaper publication provides information about the IBM i 7.2 feature of IBM DB2 for i Row and Column Access Control (RCAC). It offers a broad description of the function and advantages of controlling access to data in a comprehensive and transparent way. This publication helps you understand the capabilities of RCAC and provides examples of defining, creating, and implementing the row permissions and column masks in a relational database environment. +This paper is intended for database engineers, data-centric application developers, and security officers who want to design and implement RCAC as a part of their data control and governance policy. A solid background in IBM i object level security, DB2 for i relational database concepts, and SQL is assumed. +REDP-5110-00 +
+ +
+
+ +
+INTERNATIONAL TECHNICAL SUPPORT ORGANIZATION +BUILDING TECHNICAL INFORMATION BASED ON PRACTICAL EXPERIENCE +IBM Redbooks are developed by the IBM International Technical Support Organization. Experts from IBM, Customers and Partners from around the world create timely technical information based on realistic scenarios. Specific recommendations are provided to help you implement IT solutions more effectively in your environment. +For more information: ibm.com /redbooks +
\ No newline at end of file diff --git a/tests/data/redp5110.json b/tests/data/redp5110.json index 646de0ce..d7b9230b 100644 --- a/tests/data/redp5110.json +++ b/tests/data/redp5110.json @@ -1 +1 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "redp5110.pdf", "filename-prov": null, "document-hash": "3f8b6f0cb6d21ff16bdd7254c47ba72984b7ed1b70114e833c30f19be5366ad6", "#-pages": 146, "collection-name": null, "description": null, "page-hashes": [{"hash": "042dcdd712c3671577114114227f75ce1b5fe22a78e589c60b27d3c414ca914e", "model": "default", "page": 1}, {"hash": "19c7033f317f569819298dcaf98d4fd119632b01b323f3e244b6c14cd46b27b0", "model": "default", "page": 2}, {"hash": "1650a40ffe39a2240d05bdf5a7297a9e7de9c2564373213b732eb2009de23fd5", "model": "default", "page": 3}, {"hash": "fd0e00135169f317b2e2ab993cc64383dca2511f4a9e954563050a69dbefc35f", "model": "default", "page": 4}, {"hash": "dd607eefa7f279633dce503515463003c0167d6e1480e41daf39d95a03b02156", "model": "default", "page": 5}, {"hash": "69724844504d443f2f7dabc9d6cc912e26f1aba1fc51ddb2f248aa6f8da70505", "model": "default", "page": 6}, {"hash": "3ca620d960ef23d3419b3de71eb985eaa9bd54b7c1463116d4d11f64ab6515a8", "model": "default", "page": 7}, {"hash": "f360d9c1a29f5d9cc38f7a149b5e82ae9c177dedf534141f5d96d41792ccca01", "model": "default", "page": 8}, {"hash": "aaee7dcc87c982f44b3311ea587d9fee5d510de9567f84832e8b2effbf5e4c49", "model": "default", "page": 9}, {"hash": "f54ad5009578acd50e29ddf9e764f3894aef129245709bdda6695aca35080ef1", "model": "default", "page": 10}, {"hash": "35f70e10a2408e0395dfa9e894c5173186ac4481f414e41666e0be54f194accd", "model": "default", "page": 11}, {"hash": "64e97a3d553d9443178aae195f16f327cf503bb9c6930fe13af66b9fed277578", "model": "default", "page": 12}, {"hash": "995809366f67a29d338e5d08064a21a5bcda880bb0fe9d31085a3361059cf9ca", "model": "default", "page": 13}, {"hash": "b33a9cb89864b8461e994bc178c0f348722a75445a176a0ff059a1f1c6013c38", "model": "default", "page": 14}, {"hash": "37b17e27e1e6d405ed9c79a1282703930b1e8e1bff6b849a19ce614e5f874577", "model": "default", "page": 15}, {"hash": "ed6d8cc30effd85fb3a8b189732a80dd1d56dbc7fa4f079cd6d16f6084f4545a", "model": "default", "page": 16}, {"hash": "a355435891596f80e1ea7f3feef6b93a4f82caf62044e09a86e9ce2e02236715", "model": "default", "page": 17}, {"hash": "1d071bfa86d2d97bc7251f5f837deb4b3b72f422b79f76a83457210d40125b2a", "model": "default", "page": 18}, {"hash": "a74e58c9cd8ff01b37e4fe7df505cf495b9c1892db449b93e9076bb71fbd2ef2", "model": "default", "page": 19}, {"hash": "e83cbcc9e475190599ffc079b9266548d97fe0de76a0cb33c9fd50ef25237242", "model": "default", "page": 20}, {"hash": "c52304c295fd7f20396f82ab2bad8f0a085f067afc5692772fb9391ea880bcde", "model": "default", "page": 21}, {"hash": "86497e2615bb82251139e933e8e64153814e4ba46a499195083de8da6f5b89f9", "model": "default", "page": 22}, {"hash": "925398aa64327096c129a383e4bbec2eb083163878227c2d4e3166b44207fc03", "model": "default", "page": 23}, {"hash": "9d4e3d06a5f05410069b2b9486ec876c0e749fc8287c5d2c89940f4c44af96b5", "model": "default", "page": 24}, {"hash": "3956d5e714edf8547117687948339cc61c0727eaea2e2ad3b81e87963c1b73f0", "model": "default", "page": 25}, {"hash": "0bb0e09bd6e39cfc3da30376daecd1ad025ac38727078fd57ed04ab76e6dc8f3", "model": "default", "page": 26}, {"hash": "45005581d511136999fbc537f9465bb0b068b312ece0b9dcffe8f47a2af795fd", "model": "default", "page": 27}, {"hash": "4250019942cd107c8068cdf7c0c40c32f1735b6cd39e83eebd6b88f15f7af945", "model": "default", "page": 28}, {"hash": "d932d7afb19cda22b09acd96262695d080061df5f6f61323bbf3151b44707b0f", "model": "default", "page": 29}, {"hash": "bf6eb386ea506279669df237b54e8d789fa70b12d2830a42649632e5b057343f", "model": "default", "page": 30}, {"hash": "5dea54e30c89afe307a397ed24e083324991a1ddb17b94119f149183c1592cd7", "model": "default", "page": 31}, {"hash": "40fac6dd979f00f24fdcd1f07afad352b233f6926b8dfc8315e47c5304df1009", "model": "default", "page": 32}, {"hash": "40378b24c9b151d146ccd959a701dddfc8d9bac79a2075706c34d22dc185afd1", "model": "default", "page": 33}, {"hash": "935989acb8f1108365160d6428516b2b5cca95e12c75fb33818a33ad20730014", "model": "default", "page": 34}, {"hash": "570c8b11193a5b9e26d2b5a680c137cc6acbbb3c4c8dbfd02e96410f67444fab", "model": "default", "page": 35}, {"hash": "9f21fc6a00cee78376ee9fc31eb93ae5f0cde918f78b361f1ff0d2a1db7dfc01", "model": "default", "page": 36}, {"hash": "0e68f946bcdf7f573d88eed366216b5ba0ed470fcab1a783bcfb894802bf284e", "model": "default", "page": 37}, {"hash": "6ca7e5139b0a1993e0dd093698a9df1c1201091e509ec715d25c871c05a0863e", "model": "default", "page": 38}, {"hash": "c441267b99ad21ec04958ba35dcd465ce775b2c51c03ba67a4cfbb76f9955907", "model": "default", "page": 39}, {"hash": "aa16dbe8fa7fcd0634cf4930aa82a13c4f2d8621e759cec9c3097c15975551d2", "model": "default", "page": 40}, {"hash": "a1994f1ff203311afdc2424fedfad6f0429ccefb39ef62f7107ff75934404093", "model": "default", "page": 41}, {"hash": "92f8bad908b6a17adb727f822d8f77b673f79db90763faa32a648d89de97a0ae", "model": "default", "page": 42}, {"hash": "7cde568961d0f4ab1186b75a8d4f024a56b5065814f2050e7deda89fcb940064", "model": "default", "page": 43}, {"hash": "2d6e9fa06bae3a81449a646b629af6332dfc5780e5787e89a1eb491e60a8b95f", "model": "default", "page": 44}, {"hash": "c3c1468d8e9bbca1ac57cb97b7d6e191e3138cd98c919473a3deab89982d46fa", "model": "default", "page": 45}, {"hash": "3efc7b8e4918efef458011a9d564a062ba25e10f1b1998db385c746404995af2", "model": "default", "page": 46}, {"hash": "c96cd910329a52e1c256c61bafef7551e838ffe55cfc8de60ab8d1770a614d2a", "model": "default", "page": 47}, {"hash": "ed43a8e94b831c81406d263c7e72cb18279ff682bf82ca21d26bc8eaf58939b7", "model": "default", "page": 48}, {"hash": "beaba63670852ef3937e53edfd9c65e8381ccad289cf377ea1819ed4499649a5", "model": "default", "page": 49}, {"hash": "029387a73b937661bd354c45643d77243aae30a9e1dd692c26cadab54b33f630", "model": "default", "page": 50}, {"hash": "96cee9e611cde6da9b28630ae44aa4dddfb372bec1ad1400a4e5e0c641c18e9b", "model": "default", "page": 51}, {"hash": "d5f7a2c44833429eec81845b03adc589ed3fa9dbacfb90cbe3ac733cfb86306c", "model": "default", "page": 52}, {"hash": "0e398142d223dfaf46ad1d76702b89aa208b23fdc9f5fb7aaba1472a9db53b7b", "model": "default", "page": 53}, {"hash": "59664e9cadd6da670dd867311b1c5d9789cd944186e8ff42375b9719ddc43cf9", "model": "default", "page": 54}, {"hash": "5e4e6eaeafaf43a18590db6079f775401f7689d694cda14516fb000f7d85885c", "model": "default", "page": 55}, {"hash": "68496b0fe32a5149c0d6e70fef47ac02544a1db8176b6fa31c2c4bc59b35f933", "model": "default", "page": 56}, {"hash": "ac1bffe2a57f4b9f610dac9745f85bf8029c04e6279bae1fd942b030ca7e3635", "model": "default", "page": 57}, {"hash": "42616e9b91f856e761cf994d852d7c913e50b2fc00ce04e71cd28d51a4c88bf1", "model": "default", "page": 58}, {"hash": "4e9917d93adf25e36c0eeb37beb7881df8d8de40b23fdcde3f8c35e8867b4f7b", "model": "default", "page": 59}, {"hash": "7a484f738feda7e2327ce3bae87e5989b008d1309008f5fc237a681be7b4780c", "model": "default", "page": 60}, {"hash": "2957be6c48ca15c71ae2d63191e3ec999a65771e444c197828a2efe54aad7dee", "model": "default", "page": 61}, {"hash": "81d885ff0652b16f490f2bdf49bf5b2f85bdea4ea7dc85f98de238b437812522", "model": "default", "page": 62}, {"hash": "c0a9752603b861a7c13d678d1c89174f140ae5ef1fc4af32a872ae99bd09b494", "model": "default", "page": 63}, {"hash": "9fa129577bad65520977b6742108edd287a8413c1f002a0fcde9e8d4649e5ca3", "model": "default", "page": 64}, {"hash": "720722b50e586615b5a55451ec49b89048aecbb7450b7bf952ab7b8cab856b63", "model": "default", "page": 65}, {"hash": "91c76d552d29f2d09c34608319dd7729bd1309ccfadd56f22a00d25e8bbce771", "model": "default", "page": 66}, {"hash": "d9a6a973665fd160fb9cf52d6444cd4be6bf5a977666b625f58858ba507b0ee2", "model": "default", "page": 67}, {"hash": "dcc11d3809231dfdbe15f28126c3c6c7016f0d239c48829860133e645f0b4e9e", "model": "default", "page": 68}, {"hash": "18f5746455a39ff66f0d83bf5dcc45151e5313ccf038da38b25195a135445d23", "model": "default", "page": 69}, {"hash": "6f150521a19ebcc1dc711a861d26a1447ee33c01d770b6e985ed23ac4c3bce0b", "model": "default", "page": 70}, {"hash": "2675ed680861667ca9a8eb01fffa6b1ffc5c682d1217a7ee211ee1a14f066301", "model": "default", "page": 71}, {"hash": "cc1b3ad555bc13b0266cc1dd1646f6703b96043a17865254191fb28200897100", "model": "default", "page": 72}, {"hash": "d69dc0543126dbc6d00e1e8ce512bbf99efcda00f45cae9ab93877fc9e833308", "model": "default", "page": 73}, {"hash": "3afbdd3081b903b7941e16a1b3e0feebb23b70fa6a850e3b1119172763263fdb", "model": "default", "page": 74}, {"hash": "9ab6f9e4fd7c147650dbf4b3226a4805d3e3a86af0be0496be4cbd7eb2fe38dc", "model": "default", "page": 75}, {"hash": "3cd1d3fe8ed3a77aeaf1b68c9faa81fdc1209f44b20dd695826bfb009497af91", "model": "default", "page": 76}, {"hash": "3e0d46cb61ec6ec6ba1aa5f21e61d8988b7c531c3928c1cfa2ea5a35c5f7556f", "model": "default", "page": 77}, {"hash": "1d2d26c6366591fa7103e6920121f20b7d47e252f8e5598bc9b0d10d88b0a876", "model": "default", "page": 78}, {"hash": "6b74896cf6d9d79d6eea588138972973314a1e883e4a92eb39533e096e5fea4c", "model": "default", "page": 79}, {"hash": "2b53410a79b04ddd9d95ca46742e1916b631d56c91e67426449a2f48303233c9", "model": "default", "page": 80}, {"hash": "1cad2f44f63e2c43c0950ba8863f3a3d0f2f4afa1ae6f9ca2ceb992a34061d98", "model": "default", "page": 81}, {"hash": "1fd53dcb8bd415d94cbebe26f4938b10551f29603658e5d92b9932d2179878ba", "model": "default", "page": 82}, {"hash": "4ef9b11fb0f67f1227d7241f38a68b1e7d12cccb90802424b6fc139e84e73241", "model": "default", "page": 83}, {"hash": "1c2ea11640d6d0298f383f42acc541cee1d082453dc6c201fbd0dfe2c3583a6d", "model": "default", "page": 84}, {"hash": "fe89905acb289f8126f56f0fa57b0032cf459757a285a28e18a4fa79d0f37ff5", "model": "default", "page": 85}, {"hash": "897bc2fcbbd0147b2ad32d7130836346100dd1f483bb904be454bddee79032d3", "model": "default", "page": 86}, {"hash": "c8e638b82bad37d6d6528852ca8f58d16aa6de3ae113f9f59cc061591bbe36d4", "model": "default", "page": 87}, {"hash": "c8b4dcf9ac58518dfd7a0030612750ef310992ecfa1352cc501a3183eddc63ac", "model": "default", "page": 88}, {"hash": "311e4dab810a715c0dd964b03c57ef59105b844638789454a5a31285bb20b6c5", "model": "default", "page": 89}, {"hash": "bcc127d2a49aaeb213cddec0bef6623f19a01d5ea42b6f7495b4f803405c42f6", "model": "default", "page": 90}, {"hash": "bb0ab5360776e0488e57ac48e39d6e0df6200c2570723dcb807ad3f679c09534", "model": "default", "page": 91}, {"hash": "6fd7cdacf0d19eda989b99c3b1e02ef6d6643dbc6cfa6f10037bd0ebb7cd10b5", "model": "default", "page": 92}, {"hash": "d33f0c4ae60d66663fa25b1f7675c11437badaa8a8fa7e51daeebc6141df12ed", "model": "default", "page": 93}, {"hash": "315310a543a8ecc45c434d0e0b8aa54c6566d53d61acb74820a6649e583f9cb2", "model": "default", "page": 94}, {"hash": "38d412966dfe997ab9448d2df046448e5ebbedd2531b8527bd744c8bb5440508", "model": "default", "page": 95}, {"hash": "08d37d1668223a1a7194cf811cd594cfe30e422dd1695df02a8b73a7b735084b", "model": "default", "page": 96}, {"hash": "31d9ea5f81342dbfdc72492243a2e7f0aa9817d84d61eab0181aeaa71d75d7f5", "model": "default", "page": 97}, {"hash": "1cb53ff64bc87e1939f8b45a89a00a6267a02e718ec0c634cf7e20936ffdd4f2", "model": "default", "page": 98}, {"hash": "1245402b982e1a9d1065ac0c0cad30336aa14ecdc2cb3ef4a5c36bc55e9bbd10", "model": "default", "page": 99}, {"hash": "c38f21714819257f54186f075bf6b9446113e03dd6d40e5fd1319fd5cd3c359c", "model": "default", "page": 100}, {"hash": "9bb82caef77080aa11554e67ab1f214e5cf5e8fe2415663d128ba541cf314d5b", "model": "default", "page": 101}, {"hash": "714f390df026d13c65dea02894cf3d91496fd2ae3a94073d90f7714df79d47ee", "model": "default", "page": 102}, {"hash": "f19f8a6e418fdf2a42d8ede7c788f9f8cf33b907e3bb606e9c829320dff3bb5f", "model": "default", "page": 103}, {"hash": "2b15ecb09a734a16ed9804314a6cc9f03a12af63a904fac62a97ea21b1d2ecef", "model": "default", "page": 104}, {"hash": "8b15d46f01007cf63e5bad57b8cd889275c11e6b58bebe48ffec8842d67e7277", "model": "default", "page": 105}, {"hash": "f20a188209524e8fd1692faa3d3450cd075bb45f2962693371867cf166456dc1", "model": "default", "page": 106}, {"hash": "2d4dbf9c96c18bffaeb3b1bd321acea187066e968dd034c585a81a547f4c93c1", "model": "default", "page": 107}, {"hash": "0ef40f53d56676acaf1aef17676d06262391f04c8277eb1ba32ab7ca5d97e875", "model": "default", "page": 108}, {"hash": "25dbff770b7e10a2a2e2668b2f2977d99ed53ed37d3390e1f89d9245abf83e72", "model": "default", "page": 109}, {"hash": "2572c0b17f240729b504355e11e0d2009a92925a1faaa7b66aea649dc59d7905", "model": "default", "page": 110}, {"hash": "a3e79679ca89ec169e9967808ff8b3f9c2c2db25c113cb68c3f3a993eef15408", "model": "default", "page": 111}, {"hash": "5a47310eb886fad70101ea30ef05dee49cbda1d8a7e2446c3c61b66b3f634039", "model": "default", "page": 112}, {"hash": "992b747ebf8d366fcc11d36599c33ed004584f000855942db59e5a30dd625c7c", "model": "default", "page": 113}, {"hash": "f0bb099090288d2d8c2dad45a22598a924b5c8c3b739206496022a8985d56e25", "model": "default", "page": 114}, {"hash": "5d4e2ca3c369a87ae1732a86f0553fe650005db4637a792963f02fee28a3f1dd", "model": "default", "page": 115}, {"hash": "d23e9d367ce0fa476a6c89009c6fc6c8dd8e15dac6c21b1457a87c8ea89fc6ab", "model": "default", "page": 116}, {"hash": "2ed8bcad41539c0196738efdced854e4c0c11736a062c2bb382517307308315f", "model": "default", "page": 117}, {"hash": "a6d6fd7589a6dddaea1ae0ee683f34ba67d229ad1489d43cd55ab4bfa0a09e48", "model": "default", "page": 118}, {"hash": "4373bdfba2b9cb9f431054a081bcdbb9fde02a2a7c555237105645fc7c4300c6", "model": "default", "page": 119}, {"hash": "b9ba9a2d9c6e8fae2ae668710eb75f4e32a1debfca93371c7d2b12c849bd22da", "model": "default", "page": 120}, {"hash": "f0ac55799e80466c2f68c00232e96f16c893b304c5af92380071564bfd79cc2f", "model": "default", "page": 121}, {"hash": "a619cca5375467d6cbf87c25836da41e5a09dcab342c685b34539dd82fe86989", "model": "default", "page": 122}, {"hash": "d5eb13189c1badbc8317352c3077a84871640f1c42ba8d544f2b66e9788940b4", "model": "default", "page": 123}, {"hash": "5328248231376143041b9f94792b736e39d597c55126949b59362f6464ea0a04", "model": "default", "page": 124}, {"hash": "5201845b41de7b7c02c15934aa48093d9c3b7dd783a32f1f6887d16ab27736fd", "model": "default", "page": 125}, {"hash": "53ef8bd7beea5d3619cc02586077a54911c327d5b912872da834d7e26cbddda7", "model": "default", "page": 126}, {"hash": "eb5a30dbe63c79925f80db77000a9ae325904111ec3a76d12f0eabe9ea8184b5", "model": "default", "page": 127}, {"hash": "ea0c7446fc6d2d362e73d4581e7b8ad4608d1a569eaf7728b2565e9a62bfacc2", "model": "default", "page": 128}, {"hash": "ce7040d1ddf6c4ad312a07c56ce385cc338cb6dad98a350a3145fa651df24e10", "model": "default", "page": 129}, {"hash": "a59661e9111d2f306b39d51a1d1c2b60fafa5a0053a15e5c4df080974b4b9c8e", "model": "default", "page": 130}, {"hash": "e0eebbd57c73414b07cd40507f8b0dc3e30b7621a4da103a1b11b98178d614da", "model": "default", "page": 131}, {"hash": "663d5c537942f854d04a288e7cddc273cb931a1671b07345cf6fbd87593e6960", "model": "default", "page": 132}, {"hash": "ee15d566c88e74395f5c9cf500a25235527c226a22ac85bd940113a29690fcd3", "model": "default", "page": 133}, {"hash": "16dcf411e2a595080c73aa2c3aac658c7ea34947642e9f5d74b30637a8232ba0", "model": "default", "page": 134}, {"hash": "d06b834379d4d7edede6ad45cab9324d8ed03f6553a6ace9eef8ee2911517eae", "model": "default", "page": 135}, {"hash": "f39abd05ea9ae74cdd31f3fe7fc2cafb94364c90ff8f85b38fd763e0b4f00492", "model": "default", "page": 136}, {"hash": "c8cc8d0266caeb8d3547582e443238d020cc2b89b9b0a27881fa53a2d53eb373", "model": "default", "page": 137}, {"hash": "5df7c7769a47c31ede50376223cd8c64a630f146185eabfd69e6def4904d11e9", "model": "default", "page": 138}, {"hash": "752a8ff175ffefd5467eb28072d1ae016e4f2d121a42de192874c1314d8782af", "model": "default", "page": 139}, {"hash": "80196ef5402921f88f9a620eecc70cd40660a88bc53f0d7b41932ef750af8cf8", "model": "default", "page": 140}, {"hash": "e0675b1f0bfe007f57df25c89b6606a7fb711a9a2aea0b6ab3ed7f0c344938d9", "model": "default", "page": 141}, {"hash": "34c60aca3232bf01b5bcc0d4f745ecba5742a056e7cd56e78e733d27165319f5", "model": "default", "page": 142}, {"hash": "8add7158d438c17581bf11a58d377832b87438adddd357fc1df9627a01bb050c", "model": "default", "page": 143}, {"hash": "c6bfbf013724102c875b7177a50d9eeebd48325dc2c1ff163e018a5d86b4b638", "model": "default", "page": 144}, {"hash": "6272edb80b7baf8c345cdc69fd8b613712da5cca430baeee8b2bf74383b20940", "model": "default", "page": 145}, {"hash": "637ac3e09c925390e82504f989601641999e308491f5cd0cd8db2a22021a5412", "model": "default", "page": 146}]}, "main-text": [{"text": "Front cover", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [287.82000732421875, 741.251953125, 418.83355712890625, 763.4519653320312], "page": 1, "span": [0, 11], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [35.70000076293945, 625.8219604492188, 584.6428833007812, 709.2680053710938], "page": 1, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"text": "ibm.com /redbooks", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [36.900001525878906, 26.895000457763672, 164.45849609375, 42.13602828979492], "page": 1, "span": [0, 17], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"text": "International Technical Support Organization", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [191.8931884765625, 706.8230590820312, 468.1595153808594, 720.9096069335938], "page": 3, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [191.5712432861328, 659.2655639648438, 551.7711181640625, 688.3182373046875], "page": 3, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "November 2014", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [191.92127990722656, 629.265869140625, 290.98956298828125, 642.7371215820312], "page": 3, "span": [0, 13], "__ref_s3_data": null}]}, {"text": "REDP-5110-00", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [479.2291259765625, 27.93828010559082, 547.263671875, 38.04776382446289], "page": 3, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "Note: Before using this information and the product it supports, read the information in \"Notices\" on page vii.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [70.37338256835938, 680.7003173828125, 511.2250671386719, 703.3181762695312], "page": 4, "span": [0, 111], "__ref_s3_data": null}]}, {"text": "First Edition (November 2014)", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.45094299316406, 96.07437896728516, 206.09754943847656, 106.79737091064453], "page": 4, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "This edition applies to Version 7, Release 2 of IBM i (product number 5770-SS1).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.08177947998047, 73.64718627929688, 422.2424621582031, 83.91992950439453], "page": 4, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "' Copyright International Business Machines Corporation 2014. All rights reserved.", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [63.635929107666016, 44.85982894897461, 426.39117431640625, 54.95832443237305], "page": 4, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "Note to U.S. Government Users Restricted Rights -- Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.18267822265625, 23.176387786865234, 547.2008666992188, 43.96644592285156], "page": 4, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "Contents", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 695.9519653320312, 168.73440551757812, 718.7908325195312], "page": 5, "span": [0, 8], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.926761627197266, 27.811120986938477, 257.24334716796875, 37.25619888305664], "page": 5, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "iii", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [538.4729614257812, 27.93828010559082, 547.25927734375, 38.0196647644043], "page": 5, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "iv", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.56709289550781, 27.93828010559082, 75.64199829101562, 37.95931625366211], "page": 6, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [90.20014190673828, 27.85855484008789, 331.77874755859375, 37.22001647949219], "page": 6, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"name": "Table", "type": "table", "$ref": "#/tables/2"}, {"text": "Contents", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [488.2200012207031, 28.136999130249023, 529.1115112304688, 37.02998352050781], "page": 7, "span": [0, 8], "__ref_s3_data": null}]}, {"text": "v", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.4024658203125, 27.93828010559082, 547.3956298828125, 37.15127944946289], "page": 7, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "vi", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.29622650146484, 27.93828010559082, 75.64199829101562, 37.651676177978516], "page": 8, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [90.30646514892578, 27.79586410522461, 331.6808776855469, 37.322059631347656], "page": 8, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Notices", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 695.9519653320312, 151.5048065185547, 718.7636108398438], "page": 9, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "This information was developed for products and services offered in the U.S.A.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.18147277832031, 649.8180541992188, 413.7007141113281, 660.0758666992188], "page": 9, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "IBM may not offer the products, services, or features discussed in this document in other countries. Consult your local IBM representative for information on the products and services currently available in your area. Any reference to an IBM product, program, or service is not intended to state or imply that only that IBM product, program, or service may be used. Any functionally equivalent product, program, or service that does not infringe any IBM intellectual property right may be used instead. However, it is the user's responsibility to evaluate and verify the operation of any non-IBM product, program, or service.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.14546966552734, 579.6738891601562, 547.235595703125, 640.0175170898438], "page": 9, "span": [0, 625], "__ref_s3_data": null}]}, {"text": "IBM may have patents or pending patent applications covering subject matter described in this document. The furnishing of this document does not grant you any license to these patents. You can send license inquiries, in writing, to:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.0940933227539, 540.159912109375, 547.2992553710938, 570.1964721679688], "page": 9, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "IBM Director of Licensing, IBM Corporation, North Castle Drive, Armonk, NY 10504-1785 U.S.A.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.593505859375, 529.7247314453125, 489.1996154785156, 540.0978393554688], "page": 9, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "The following paragraph does not apply to the United Kingdom or any other country where such provisions are inconsistent with local law: INTERNATIONAL BUSINESS MACHINES CORPORATION PROVIDES THIS PUBLICATION \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Some states do not allow disclaimer of express or implied warranties in certain transactions, therefore, this statement may not apply to you.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.16057586669922, 459.4730224609375, 547.1917114257812, 520.091796875], "page": 9, "span": [0, 541], "__ref_s3_data": null}]}, {"text": "This information could include technical inaccuracies or typographical errors. Changes are periodically made to the information herein; these changes will be incorporated in new editions of the publication. IBM may make improvements and/or changes in the product(s) and/or the program(s) described in this publication at any time without notice.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.943748474121094, 410.14208984375, 547.2783813476562, 449.93365478515625], "page": 9, "span": [0, 345], "__ref_s3_data": null}]}, {"text": "Any references in this information to non-IBM websites are provided for convenience only and do not in any manner serve as an endorsement of those websites. The materials at those websites are not part of the materials for this IBM product and use of those websites is at your own risk.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.966217041015625, 369.6625671386719, 539.7974243164062, 400.06964111328125], "page": 9, "span": [0, 286], "__ref_s3_data": null}]}, {"text": "IBM may use or distribute any of the information you supply in any way it believes appropriate without incurring any obligation to you.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.32443237304688, 339.65264892578125, 547.1986694335938, 360.1954650878906], "page": 9, "span": [0, 135], "__ref_s3_data": null}]}, {"text": "Any performance data contained herein was determined in a controlled environment. Therefore, the results obtained in other operating environments may vary significantly. Some measurements may have been made on development-level systems and there is no guarantee that these measurements will be the same on generally available systems. Furthermore, some measurements may have been estimated through extrapolation. Actual results may vary. Users of this document should verify the applicable data for their specific environment.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.14064025878906, 269.77093505859375, 544.1587524414062, 329.7679443359375], "page": 9, "span": [0, 526], "__ref_s3_data": null}]}, {"text": "Information concerning non-IBM products was obtained from the suppliers of those products, their published announcements or other publicly available sources. IBM has not tested those products and cannot confirm the accuracy of performance, compatibility or any other claims related to non-IBM products. Questions on the capabilities of non-IBM products should be addressed to the suppliers of those products.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.13702392578125, 219.69473266601562, 547.231689453125, 259.8896789550781], "page": 9, "span": [0, 408], "__ref_s3_data": null}]}, {"text": "This information contains examples of data and reports used in daily business operations. To illustrate them as completely as possible, the examples include the names of individuals, companies, brands, and products. All of these names are fictitious and any similarity to the names and addresses used by an actual business enterprise is entirely coincidental.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.02989196777344, 169.76266479492188, 545.7865600585938, 209.7733154296875], "page": 9, "span": [0, 359], "__ref_s3_data": null}]}, {"text": "COPYRIGHT LICENSE:", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.42018127441406, 150.16415405273438, 172.49951171875, 160.39039611816406], "page": 9, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "This information contains sample application programs in source language, which illustrate programming techniques on various operating platforms. You may copy, modify, and distribute these sample programs in any form without payment to IBM, for the purposes of developing, using, marketing or distributing application programs conforming to the application programming interface for the operating platform for which the sample programs are written. These examples have not been thoroughly tested under all conditions. IBM, therefore, cannot guarantee or imply reliability, serviceability, or function of these programs.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.03350067138672, 79.5408706665039, 547.2437744140625, 140.08206176757812], "page": 9, "span": [0, 619], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.92543411254883, 27.7843074798584, 257.24334716796875, 37.34343719482422], "page": 9, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "vii", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.465576171875, 27.93828010559082, 547.250244140625, 37.77464294433594], "page": 9, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Trademarks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.19252014160156, 706.0162963867188, 154.14569091796875, 721.5706787109375], "page": 10, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "IBM, the IBM logo, and ibm.com are trademarks or registered trademarks of International Business Machines Corporation in the United States, other countries, or both. These and other IBM trademarked terms are marked on their first occurrence in this information with the appropriate symbol (fi or \u2122), indicating US registered or common law trademarks owned by IBM at the time this information was published. Such trademarks may also be registered or common law trademarks in other countries. A current list of IBM trademarks is available on the Web at http://www.ibm.com/legal/copytrade.shtml", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.04251861572266, 629.2591552734375, 547.2604370117188, 689.3146362304688], "page": 10, "span": [0, 591], "__ref_s3_data": null}]}, {"text": "The following terms are trademarks of the International Business Machines Corporation in the United States, other countries, or both:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.07420349121094, 599.2596435546875, 546.6150512695312, 619.2008666992188], "page": 10, "span": [0, 133], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/3"}, {"text": "The following terms are trademarks of other companies:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.15382385253906, 537.2783203125, 311.9006652832031, 547.204833984375], "page": 10, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Windows, and the Windows logo are trademarks of Microsoft Corporation in the United States, other countries, or both.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.90792465209961, 507.27880859375, 509.53704833984375, 527.1090698242188], "page": 10, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "Other company, product, or service names may be trademarks or service marks of others.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.3842544555664, 486.98126220703125, 464.51568603515625, 497.27496337890625], "page": 10, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "viii", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.940345764160156, 26.91827964782715, 81.16200256347656, 36.210243225097656], "page": 10, "span": [0, 4], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [95.68927764892578, 26.413494110107422, 337.0337829589844, 36.1352424621582], "page": 10, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "DB2 for i Center of Excellence", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.80000305175781, 706.416015625, 235.86239624023438, 717.5160522460938], "page": 11, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "Solution Brief IBM Systems Lab Services and Training", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [93.55310821533203, 636.66357421875, 234.06729125976562, 654.3007202148438], "page": 11, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "Highlights", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [144.47474670410156, 454.5254211425781, 188.74681091308594, 464.9404296875], "page": 11, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [144.74562072753906, 433.3105773925781, 242.87388610839844, 447.85009765625], "page": 11, "span": [0, 532], "__ref_s3_data": null}]}, {"text": "GLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [144.467529296875, 402.7626953125, 259.22869873046875, 425.5424499511719], "page": 11, "span": [0, 876], "__ref_s3_data": null}]}, {"text": "GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [144.52346801757812, 379.9961242675781, 249.8356170654297, 394.7245788574219], "page": 11, "span": [0, 672], "__ref_s3_data": null}]}, {"text": "GLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [144.7223358154297, 357.3323669433594, 234.2516326904297, 371.9924011230469], "page": 11, "span": [0, 613], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"text": "Power Services", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [460.6785583496094, 646.5781860351562, 506.2869873046875, 653.638916015625], "page": 11, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "DB2 for i Center of Excellence", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [280.1233215332031, 515.3794555664062, 463.8094177246094, 554.3141479492188], "page": 11, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "Expert help to achieve your business requirements", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [279.622314453125, 503.428466796875, 483.57049560546875, 514.8067626953125], "page": 11, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "We build confident, satisfied clients", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [279.9364929199219, 467.1043395996094, 443.2821044921875, 476.8785095214844], "page": 11, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "No one else has the vast consulting experiences, skills sharing and renown service offerings to do what we can do for you.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [279.7645568847656, 446.6058044433594, 488.1546630859375, 464.781982421875], "page": 11, "span": [0, 122], "__ref_s3_data": null}]}, {"text": "Because no one else is IBM.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [280.2401123046875, 427.2699890136719, 367.8602294921875, 435.3384704589844], "page": 11, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "With combined experiences and direct access to development groups, we're the experts in IBM DB2\u00ae for i. The DB2 for i Center of Excellence (CoE) can help you achieve-perhaps reexamine and exceed-your business requirements and gain more confidence and satisfaction in IBM product data management products and solutions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [279.7528381347656, 366.48248291015625, 500.4913024902344, 415.5780944824219], "page": 11, "span": [0, 318], "__ref_s3_data": null}]}, {"text": "Who we are, some of what we do", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [279.6987609863281, 345.1319274902344, 435.1271667480469, 354.7207946777344], "page": 11, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "Global CoE engagements cover topics including:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [279.5264587402344, 334.4953918457031, 434.56317138671875, 342.8038024902344], "page": 11, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "r Database performance and scalability", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [280.1374206542969, 315.2233581542969, 401.9148254394531, 323.6479187011719], "page": 11, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "r Advanced SQL knowledge and skills transfer", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [279.9528503417969, 304.53717041015625, 424.9964599609375, 313.6093444824219], "page": 11, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "r Business intelligence and analytics", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [280.2003479003906, 295.0150451660156, 392.3099060058594, 302.7135314941406], "page": 11, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "r DB2 Web Query", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [280.0979919433594, 284.3857421875, 339.94354248046875, 292.44427490234375], "page": 11, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "r Query/400 modernization for better reporting and analysis capabilities", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [279.9316101074219, 274.4402160644531, 504.1931457519531, 282.7410583496094], "page": 11, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "r Database modernization and re-engineering", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [279.8770751953125, 263.43438720703125, 423.13360595703125, 271.96844482421875], "page": 11, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "r Data-centric architecture and design", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [280.0404968261719, 253.90310668945312, 400.11041259765625, 261.8250732421875], "page": 11, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "r Extremely large database and overcoming limits to growth", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [280.1170959472656, 243.8183135986328, 467.3244323730469, 252.07838439941406], "page": 11, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "r ISV education and enablement", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [279.9450988769531, 234.0165557861328, 382.3848876953125, 241.8405303955078], "page": 11, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "What you can expect", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [93.4457015991211, 623.955322265625, 193.95431518554688, 633.6002197265625], "page": 12, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "Depending on the engagement, our team of consultants offer:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [93.44943237304688, 613.8342895507812, 283.61541748046875, 622.0606079101562], "page": 12, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "r Briefings, consulting and guidance on demand", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [93.83878326416016, 593.75537109375, 243.2284698486328, 602.1784057617188], "page": 12, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "r Illumination of the DB2 for i capabilities and leadership to exploit them", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [93.76670837402344, 574.0309448242188, 282.7251281738281, 592.1212158203125], "page": 12, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "r Analysis and remediation of performance and scalability issues caused by inefficient database design and implementation", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [93.70116424560547, 543.4135131835938, 274.4058532714844, 571.626708984375], "page": 12, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "r Configuration of systems, operating system and products to fully leverage database capabilities", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [93.66458129882812, 522.5725708007812, 285.813232421875, 541.2940063476562], "page": 12, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "Key client benefits", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [93.60704040527344, 499.23284912109375, 179.39649963378906, 508.8530578613281], "page": 12, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "T Gain greater database and application performance within your current environment. Achieve greater productivity in the development and maintenance of database and applications using modern techniques. Architect and design data structures to accommodate and benefit from business analytics (BA) tools and processes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [93.4247055053711, 438.3083190917969, 282.49176025390625, 503.74200439453125], "page": 12, "span": [0, 316], "__ref_s3_data": null}]}, {"text": "For more information", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [93.71710968017578, 416.9537353515625, 192.12144470214844, 425.96875], "page": 12, "span": [0, 20], "__ref_s3_data": null}]}, {"text": "Pricing depends on the scope of work. Learn more about the DB2 for i Center of Excellence and other related products and services. Contact stgls@us.ibm.com or visit:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [93.525146484375, 386.6142883300781, 274.9092102050781, 414.7449035644531], "page": 12, "span": [0, 165], "__ref_s3_data": null}]}, {"text": "ibm.com GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [93.6114730834961, 366.9930725097656, 216.5275421142578, 374.27313232421875], "page": 12, "span": [0, 298], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"text": "\u00a9 Copyright IBM Corporation 2013", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.5734558105469, 575.8102416992188, 409.8566589355469, 583.2079467773438], "page": 12, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "IBM Corporation", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.6012878417969, 561.0743408203125, 358.2181701660156, 567.9740600585938], "page": 12, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "Route 100", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.8481140136719, 553.4358520507812, 338.6964416503906, 559.6707153320312], "page": 12, "span": [0, 9], "__ref_s3_data": null}]}, {"text": "Somers, NY 10589", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.67144775390625, 545.79736328125, 361.2178039550781, 552.7429809570312], "page": 12, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "Produced in the United States of America March 2013", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.4100341796875, 522.8818969726562, 420.7811584472656, 537.3163452148438], "page": 12, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "IBM, the IBM logo, ibm.com, DB2 and Power Systems are trademarks of International Business Machines Corp., registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the web at \"Copyright and trademark information\" at www.ibm.com/legal/ copytrade.shtml .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.1656494140625, 470.847412109375, 505.15802001953125, 516.7039794921875], "page": 12, "span": [0, 367], "__ref_s3_data": null}]}, {"text": "This document is current as of the initial date of publication and may be changed by IBM at any time.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.3301696777344, 450.2502136230469, 500.2719421386719, 464.6746520996094], "page": 12, "span": [0, 101], "__ref_s3_data": null}]}, {"text": "Not all offerings are available in every country in which IBM operates.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [309.0384826660156, 436.8582763671875, 494.2660827636719, 443.8564758300781], "page": 12, "span": [0, 71], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/6"}, {"text": "Please Recycle", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [333.6021728515625, 421.63079833984375, 375.9269104003906, 428.56036376953125], "page": 12, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "QLS12392-USEN-00", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [446.13677978515625, 118.22988891601562, 505.1431579589844, 125.14271545410156], "page": 12, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "Preface", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 695.9519653320312, 151.46160888671875, 718.642333984375], "page": 13, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "This IBMfi Redpaper\u2122 publication provides information about the IBM i 7.2 feature of IBM DB2fi for i Row and Column Access Control (RCAC). It offers a broad description of the function and advantages of controlling access to data in a comprehensive and transparent way. This publication helps you understand the capabilities of RCAC and provides examples of defining, creating, and implementing the row permissions and column masks in a relational database environment.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.77647399902344, 590.1392822265625, 547.3082275390625, 660.1563720703125], "page": 13, "span": [0, 469], "__ref_s3_data": null}]}, {"text": "This paper is intended for database engineers, data-centric application developers, and security officers who want to design and implement RCAC as a part of their data control and governance policy. A solid background in IBM i object level security, DB2 for i relational database concepts, and SQL is assumed.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9678192138672, 531.8663330078125, 546.4656982421875, 577.9606323242188], "page": 13, "span": [0, 309], "__ref_s3_data": null}]}, {"text": "This paper was produced by the IBM DB2 for i Center of Excellence team in partnership with the International Technical Support Organization (ITSO), Rochester, Minnesota US.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.05535888671875, 449.6070251464844, 547.2366943359375, 472.15496826171875], "page": 13, "span": [0, 172], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/7"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/8"}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.00379180908203, 27.771316528320312, 257.24334716796875, 37.35597229003906], "page": 13, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "xi", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [538.0973510742188, 27.93828010559082, 547.2503051757812, 37.66927719116211], "page": 13, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Jim Bainbridge is a senior DB2 consultant on the DB2 for i Center of Excellence team in the IBM Lab Services and Training organization. His primary role is training and implementation services for IBM DB2 Web Query for i and business analytics. Jim began his career with IBM 30 years ago in the IBM Rochester Development Lab, where he developed cooperative processing products that paired IBM PCs with IBM S/36 and AS/.400 systems. In the years since, Jim has held numerous technical roles, including independent software vendors technical support on a broad range of IBM technologies and products, and supporting customers in the IBM Executive Briefing Center and IBM Project Office.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.83331298828125, 275.1402587890625, 541.2507934570312, 417.39556884765625], "page": 13, "span": [0, 684], "__ref_s3_data": null}]}, {"text": "Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Rochester, Minnesota. He writes extensively and teaches IBM classes worldwide in all areas of DB2 for i. Before joining STG Lab Services, he worked in the ITSO for nine years writing multiple IBM Redbooksfi publications. He also worked for IBM Colombia as an IBM AS/400fi IT Specialist doing presales support for the Andean countries. He has 28 years of experience in the computing field and has taught database classes in Colombian universities. He holds a Master's degree in Computer Science from EAFIT, Colombia. His areas of expertise are database technology, performance, and data warehousing. Hernando can be contacted at hbedoya@us.ibm.com .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.7664794921875, 111.162841796875, 541.2737426757812, 265.5693664550781], "page": 13, "span": [0, 726], "__ref_s3_data": null}]}, {"text": "Authors", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.30328369140625, 488.9364013671875, 125.36660766601562, 504.30010986328125], "page": 13, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "xii", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.2465591430664, 27.93828010559082, 78.4020004272461, 37.6812858581543], "page": 14, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.39225006103516, 27.678035736083984, 334.4214172363281, 37.328025817871094], "page": 14, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/9"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/10"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/11"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/12"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/13"}, {"text": "Rob Bestgen is a member of the DB2 for i Center of Excellence team helping customers use the capabilities of DB2 for i. In addition, Rob is the chief architect of the DB2 SQL Query Engine (SQE) for DB2 for i and is the product development manager for DB2 Web Query for i.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.9754943847656, 657.044921875, 541.2052612304688, 715.2725219726562], "page": 14, "span": [0, 271], "__ref_s3_data": null}]}, {"text": "Mike Cain is a Senior Technical Staff Member within the IBM Systems and Technology Group. He is also the founder and team leader of the DB2 for i Center of Excellence in Rochester, Minnesota US. Before his current position, he worked as an IBM AS/400 Systems Engineer and technical consultant. Before joining IBM in 1988, Mike worked as a System/38 programmer and data processing manager for a property and casualty insurance company. Mike has 26 years of experience with IBM, engaging clients and Business Partners around the world. In addition to assisting clients, he uses his knowledge and experience to influence the IBM solution, development, and support processes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.9455871582031, 457.75616455078125, 541.171630859375, 599.965576171875], "page": 14, "span": [0, 671], "__ref_s3_data": null}]}, {"text": "Dan Cruikshank has been an IT Professional since 1972. He has consulted on a number of different project areas since joining IBM Rochester in 1988. Since 1993, Dan was focused primarily on resolving IBM System ifi application and database performance issues at several IBM customer accounts. Since 1998, Dan has been one of the primary instructors for the Database Optimization Workshop. Most recently, Dan is a member of the DB2 for i Center of Excellence team with IBM Rochester Lab Services.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.6852111816406, 341.8028259277344, 541.3219604492188, 447.9212951660156], "page": 14, "span": [0, 494], "__ref_s3_data": null}]}, {"text": "Jim Denton is a senior consultant at the IBM DB2 for i Center of Excellence, where his responsibilities include both teaching courses and hands on consulting. Jim specializes in SQL performance, data-centric programming, and database modernization. Jim started his IBM career in 1981 as an S/38 operating system programmer. Before joining the consulting team, his key assignments included 10 years as a systems performance specialist, five years as the lead \"JDE on i\" analyst, three years as a consultant at the IBM Benchmark and Briefing Center in Montpellier France, and a total of 11 years as an operating system developer, including five years designing and implementing enhancements to DB2 for i.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.8884582519531, 187.7853546142578, 541.2412719726562, 330.1932067871094], "page": 14, "span": [0, 702], "__ref_s3_data": null}]}, {"text": "Doug Mack is a DB2 for i and Business Intelligence Consultant in the IBM Power Systems\u2122 Lab Services organization. Doug's 30+ year career with IBM spans many roles, including product development, technical sales support, Business Intelligence Sales Specialist, and DB2 for i Product Marketing Manager. Doug is a featured speaker at User Group conferences and meetings, IBM Technical Conferences, and Executive Briefings.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [263.0142517089844, 83.7471923828125, 541.1943969726562, 177.92990112304688], "page": 14, "span": [0, 420], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/14"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/15"}, {"text": "Tom McKinley is an IBM Lab Services Consultant working on DB2 for IBM i in Rochester MN. His main focus is complex query performance that is associated with Business Intelligence running on Very Large Databases. He worked as a developer or performance analyst in the DB area from 1986 until 2006. Some of his major pieces of work include the Symmetric Multiple processing capabilities of DB2 for IBM i and Large Object Data types. In addition, he was on the original team that designed and built the SQL Query Engine. Before his database work, he worked on Licensed Internal Code for System 34 and System 36.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.9324951171875, 584.8549194335938, 541.1551513671875, 715.4559936523438], "page": 15, "span": [0, 608], "__ref_s3_data": null}]}, {"text": "Kent Milligan is a senior DB2 consultant on the DB2 for i Center of Excellence team within the IBM Lab Services and Training organization. His primary responsibility is helping software developers use the latest DB2 technologies and port applications from other databases to DB2 for i. After graduating from the University of Iowa, Kent spent the first eight years of his IBM career as a member of the DB2 development team in Rochester.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [262.8179016113281, 481.3031005859375, 541.2665405273438, 575.6270141601562], "page": 15, "span": [0, 436], "__ref_s3_data": null}]}, {"text": "Thanks to the following people for their contributions to this project:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8874969482422, 442.6785888671875, 432.1602478027344, 453.3590087890625], "page": 15, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "Debra Landon", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.64984130859375, 421.29852294921875, 200.16773986816406, 431.2518615722656], "page": 15, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "International Technical Support Organization, Rochester Center", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.55917358398438, 408.904296875, 438.75390625, 419.7124328613281], "page": 15, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "Craig Aldrich, Mark Anderson, Theresa Euler, Scott Forstie, Chad Olstad IBM Rochester Development", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.283203125, 375.0847473144531, 457.729736328125, 397.47100830078125], "page": 15, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "Now you can become a published author, too!", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 331.5992736816406, 413.1525573730469, 347.63641357421875], "page": 15, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "Here's an opportunity to spotlight your skills, grow your career, and become a published author-all at the same time! Join an ITSO residency project and help write a book in your area of expertise, while honing your experience using leading-edge technologies. Your efforts will help to increase product acceptance and customer satisfaction, as you expand your network of technical contacts and relationships. Residencies run from two to six weeks in length, and you can participate either in person or as a remote resident working from your home base.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9495849609375, 233.25941467285156, 547.232666015625, 315.36279296875], "page": 15, "span": [0, 551], "__ref_s3_data": null}]}, {"text": "Find out more about the residency program, browse the residency index, and apply online at: ibm.com /redbooks/residencies.html", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.6442108154297, 194.49908447265625, 546.8164672851562, 221.5010223388672], "page": 15, "span": [0, 126], "__ref_s3_data": null}]}, {"text": "Comments welcome", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.77066040039062, 151.01637268066406, 219.43165588378906, 166.51051330566406], "page": 15, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "Your comments are important to us!", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0441436767578, 123.81623840332031, 294.74969482421875, 134.08399963378906], "page": 15, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "We want our papers to be as helpful as possible. Send us your comments about this paper or other IBM Redbooks publications in one of the following ways:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.76272583007812, 90.2786636352539, 547.25244140625, 112.70419311523438], "page": 15, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "GLYPH Use the online Contact us review Redbooks form found at:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5581817626953, 73.23909759521484, 412.55645751953125, 83.14573669433594], "page": 15, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "ibm.com /redbooks", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.20013427734375, 56.498329162597656, 231.11917114257812, 65.66831970214844], "page": 15, "span": [0, 17], "__ref_s3_data": null}]}, {"text": "Preface", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [485.1000061035156, 28.136999130249023, 520.88134765625, 37.25944519042969], "page": 15, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "xiii", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [532.9468994140625, 27.93828010559082, 547.1934204101562, 37.73838806152344], "page": 15, "span": [0, 4], "__ref_s3_data": null}]}, {"text": "xiv", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.14579772949219, 27.93828010559082, 81.16200256347656, 37.55492401123047], "page": 16, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [95.48082733154297, 27.832721710205078, 337.05462646484375, 37.28497314453125], "page": 16, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "GLYPH Send your comments in an email to:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69615173339844, 710.8174438476562, 310.3739318847656, 721.4570922851562], "page": 16, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "redbooks@us.ibm.com", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.17933654785156, 694.5873413085938, 246.37371826171875, 704.374267578125], "page": 16, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "GLYPH Mail your comments to: IBM Corporation, International Technical Support Organization Dept. HYTD Mail Station P099 2455 South Road Poughkeepsie, NY 12601-5400", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.74884033203125, 624.2797241210938, 426.992431640625, 686.70166015625], "page": 16, "span": [0, 173], "__ref_s3_data": null}]}, {"text": "Stay connected to IBM Redbooks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.5674057006836, 581.0363159179688, 317.6510925292969, 597.2184448242188], "page": 16, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "GLYPH Find us on Facebook:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.58547973632812, 554.25830078125, 246.8371124267578, 563.9945678710938], "page": 16, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "http://www.facebook.com/IBMRedbooks", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.32769775390625, 537.2778930664062, 326.0977478027344, 547.1971435546875], "page": 16, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "GLYPH Follow us on Twitter:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.44610595703125, 520.2987060546875, 241.52239990234375, 530.50732421875], "page": 16, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "http://twitter.com/ibmredbooks", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.69737243652344, 503.3732604980469, 301.0782165527344, 513.1410522460938], "page": 16, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "GLYPH Look for us on LinkedIn:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56719970703125, 486.2793273925781, 257.426513671875, 496.61798095703125], "page": 16, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "http://www.linkedin.com/groups?home=&gid=2130806", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.52149963378906, 469.2062072753906, 391.0767822265625, 479.30670166015625], "page": 16, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "GLYPH Explore new Redbooks publications, residencies, and workshops with the IBM Redbooks weekly newsletter:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.64007568359375, 439.531982421875, 546.2178955078125, 461.6893615722656], "page": 16, "span": [0, 118], "__ref_s3_data": null}]}, {"text": "https://www.redbooks.ibm.com/Redbooks.nsf/subscribe?OpenForm", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.62750244140625, 423.0643005371094, 451.3365478515625, 433.34259033203125], "page": 16, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "GLYPH Stay current on recent Redbooks publications with RSS Feeds:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65512084960938, 406.300537109375, 429.3480529785156, 416.6412658691406], "page": 16, "span": [0, 76], "__ref_s3_data": null}]}, {"text": "http://www.redbooks.ibm.com/rss.html", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.41917419433594, 389.44549560546875, 331.0777282714844, 398.970458984375], "page": 16, "span": [0, 36], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/16"}, {"text": "Chapter 1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 17, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "1", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 699.4268188476562], "page": 17, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Securing and protecting IBM DB2 data", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.619384765625, 482.1217956542969, 547.3047485351562, 538.820068359375], "page": 17, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.17431640625, 361.6726989746094, 547.2540283203125, 443.9345703125], "page": 17, "span": [0, 648], "__ref_s3_data": null}]}, {"text": "Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0235595703125, 303.58026123046875, 527.206298828125, 349.85009765625], "page": 17, "span": [0, 304], "__ref_s3_data": null}]}, {"text": "This chapter describes how you can secure and protect data in DB2 for i. The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7783660888672, 270.1002197265625, 547.1551513671875, 291.9642639160156], "page": 17, "span": [0, 122], "__ref_s3_data": null}]}, {"text": "GLYPH Security fundamentals", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.81155395507812, 252.7996368408203, 250.23167419433594, 263.56298828125], "page": 17, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "GLYPH Current state of IBM i security", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.76536560058594, 240.873291015625, 283.06231689453125, 251.25155639648438], "page": 17, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "GLYPH DB2 for i security controls", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7914276123047, 229.06103515625, 264.8818664550781, 239.46493530273438], "page": 17, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "$^{1 }$http://www.idtheftcenter.org", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [135.72442626953125, 67.0481948852539, 258.362548828125, 77.52366638183594], "page": 17, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "$^{2 }$http://www.ponemon.org /", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [135.91265869140625, 56.83421325683594, 234.79055786132812, 66.7944107055664], "page": 17, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.92715072631836, 27.736194610595703, 257.24334716796875, 37.3647346496582], "page": 17, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "1", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.6627807617188, 27.93828010559082, 547.2176513671875, 37.46987533569336], "page": 17, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "1.1 Security fundamentals", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.72772979736328, 702.0524291992188, 267.40582275390625, 718.4620361328125], "page": 18, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "Before reviewing database security techniques, there are two fundamental steps in securing information assets that must be described:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1598358154297, 664.178466796875, 545.0048217773438, 685.9579467773438], "page": 18, "span": [0, 133], "__ref_s3_data": null}]}, {"text": "GLYPH First, and most important, is the definition of a company's security policy . Without a security policy, there is no definition of what are acceptable practices for using, accessing, and storing information by who, what, when, where, and how. A security policy should minimally address three things: confidentiality, integrity, and availability.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.834716796875, 610.8225708007812, 547.1642456054688, 657.5287475585938], "page": 18, "span": [0, 361], "__ref_s3_data": null}]}, {"text": "The monitoring and assessment of adherence to the security policy determines whether your security strategy is working. Often, IBM security consultants are asked to perform security assessments for companies without regard to the security policy. Although these assessments can be useful for observing how the system is defined and how data is being accessed, they cannot determine the level of security without a security policy. Without a security policy, it really is not an assessment as much as it is a baseline for monitoring the changes in the security settings that are captured.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [149.89341735839844, 521.58251953125, 547.2608642578125, 604.0130004882812], "page": 18, "span": [0, 587], "__ref_s3_data": null}]}, {"text": "A security policy is what defines whether the system and its settings are secure (or not).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.36105346679688, 505.0062561035156, 541.9920043945312, 515.3053588867188], "page": 18, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "GLYPH The second fundamental in securing data assets is the use of resource security . If implemented properly, resource security prevents data breaches from both internal and external intrusions. Resource security controls are closely tied to the part of the security policy that defines who should have access to what information resources. A hacker might be good enough to get through your company firewalls and sift his way through to your system, but if they do not have explicit access to your database, the hacker cannot compromise your information assets.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.52206420898438, 415.70892333984375, 547.1582641601562, 497.90234375], "page": 18, "span": [0, 573], "__ref_s3_data": null}]}, {"text": "With your eyes now open to the importance of securing information assets, the rest of this chapter reviews the methods that are available for securing database resources on IBM i.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8480987548828, 381.79827880859375, 535.3616943359375, 403.79864501953125], "page": 18, "span": [0, 179], "__ref_s3_data": null}]}, {"text": "1.2 Current state of IBM i security", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 338.5409240722656, 323.3839111328125, 354.3926086425781], "page": 18, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "Because of the inherently secure nature of IBM i, many clients rely on the default system settings to protect their business data that is stored in DB2 for i. In most cases, this means no data protection because the default setting for the Create default public authority (QCRTAUT) system value is *CHANGE.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.07699584960938, 275.4822998046875, 547.3182373046875, 322.0619812011719], "page": 18, "span": [0, 306], "__ref_s3_data": null}]}, {"text": "Even more disturbing is that many IBM i clients remain in this state, despite the news headlines and the significant costs that are involved with databases being compromised. This default security configuration makes it quite challenging to implement basic security policies. A tighter implementation is required if you really want to protect one of your company's most valuable assets, which is the data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.86489868164062, 206.1400604248047, 547.284423828125, 264.1254577636719], "page": 18, "span": [0, 405], "__ref_s3_data": null}]}, {"text": "Traditionally, IBM i applications have employed menu-based security to counteract this default configuration that gives all users access to the data. The theory is that data is protected by the menu options controlling what database operations that the user can perform. This approach is ineffective, even if the user profile is restricted from running interactive commands. The reason is that in today's connected world there are a multitude of interfaces into the system, from web browsers to PC clients, that bypass application menus. If there are no object-level controls, users of these newer interfaces have an open door to your data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89337158203125, 111.43904876708984, 547.2832641601562, 194.00531005859375], "page": 18, "span": [0, 640], "__ref_s3_data": null}]}, {"text": "2", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.08084869384766, 27.93828010559082, 72.8219985961914, 37.463680267333984], "page": 18, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.60530853271484, 27.763837814331055, 328.7811279296875, 37.33225631713867], "page": 18, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Some clients using this default configuration have toughened their database security with exit-point solutions from third-party vendors. IBM i exit points allow a user-written program to be called every time that a particular interface (for example, FTP) is used or an event occurs (for example, a profile is created). Security tools that are based on these exit points increase the level of security on a system by locking down interfaces that are not under the control of menu-based or application authority. In addition, exit-point solutions allow clients to implement more granular security controls, such as allowing users access only to the database during certain hours of the day.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91156005859375, 626.8826904296875, 546.4398193359375, 721.452392578125], "page": 19, "span": [0, 688], "__ref_s3_data": null}]}, {"text": "Although exit-point solutions can provide great benefits, they are not an alternative to object-level control of your databases. Exit-point solutions help secure interfaces, but they do not completely protect the data that is stored in your DB2 objects. Exit points do not exist for every data access interface on the system. Therefore, if an application starts using an unprotected interface, the only thing protecting your data is object-level access control. When your security implementation totally relies on exit points, then it is also important to track any new data interfaces that appear as IBM delivers new releases and products to ensure that your exit-point solution provides coverage for those new interfaces.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.64999389648438, 520.9066772460938, 547.2666015625, 615.0797729492188], "page": 19, "span": [0, 723], "__ref_s3_data": null}]}, {"text": "An exit-point solution is a good option for databases with security holes that are caused by a reliance on the default security setup or menu-based control. However, your security work should not stop there. Instead, you must continue to work on a complete database security solution by controlling data access at the object level.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.94229125976562, 462.9145812988281, 546.2266235351562, 509.42431640625], "page": 19, "span": [0, 331], "__ref_s3_data": null}]}, {"text": "1.3 DB2 for i security controls", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 419.72991943359375, 295.2049255371094, 435.87127685546875], "page": 19, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "As described in 1.2, \"Current state of IBM i security\" on page 2, object-level controls on your DB2 objects are a critical success factor in securing your business data. Although database object-level security is a strong security feature, some clients have found that object-level security does not have the granularity that is required to adhere to regulatory or compliance policies. A user that is granted object-level access to a DB2 table has the authority to view all of the rows and values in that table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84832763671875, 333.2792053222656, 547.1688842773438, 403.44085693359375], "page": 19, "span": [0, 511], "__ref_s3_data": null}]}, {"text": "As shown in Figure 1-1, it is an all-or-nothing access to the rows of a table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.60513305664062, 310.94268798828125, 466.3186340332031, 321.458984375], "page": 19, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "Figure 1-1 All-or-nothing access to the rows of a table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.33961486816406, 80.503662109375, 354.8553466796875, 89.81105041503906], "page": 19, "span": [0, 55], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/17"}, {"text": "Chapter 1. Securing and protecting IBM DB2 data", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [328.4175109863281, 27.881893157958984, 529.1063232421875, 37.1580810546875], "page": 19, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "3", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.21044921875, 27.93828010559082, 547.2176513671875, 37.613685607910156], "page": 19, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "4", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.39376831054688, 27.93828010559082, 72.8219985961914, 37.595481872558594], "page": 20, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.5719223022461, 27.820940017700195, 328.7376708984375, 37.318206787109375], "page": 20, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Many businesses are trying to limit data access to a need-to-know basis. This security goal means that users should be given access only to the minimum set of data that is required to perform their job. Often, users with object-level access are given access to row and column values that are beyond what their business task requires because that object-level security provides an all-or-nothing solution. For example, object-level controls allow a manager to access data about all employees. Most security policies limit a manager to accessing data only for the employees that they manage.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9315185546875, 638.8145141601562, 544.3033447265625, 721.4989013671875], "page": 20, "span": [0, 589], "__ref_s3_data": null}]}, {"text": "1.3.1 Existing row and column control", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.61784362792969, 606.2153930664062, 301.4690246582031, 619.5537109375], "page": 20, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "Some IBM i clients have tried augmenting the all-or-nothing object-level security with SQL views (or logical files) and application logic, as shown in Figure 1-2. However, application-based logic is easy to bypass with all of the different data access interfaces that are provided by the IBM i operating system, such as Open Database Connectivity (ODBC) and System i Navigator.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.62290954589844, 534.758544921875, 541.5673828125, 593.4801635742188], "page": 20, "span": [0, 377], "__ref_s3_data": null}]}, {"text": "Using SQL views to limit access to a subset of the data in a table also has its own set of challenges. First, there is the complexity of managing all of the SQL view objects that are used for securing data access. Second, scaling a view-based security solution can be difficult as the amount of data grows and the number of users increases.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.88414001464844, 476.91424560546875, 547.4407958984375, 523.4513549804688], "page": 20, "span": [0, 340], "__ref_s3_data": null}]}, {"text": "Even if you are willing to live with these performance and management issues, a user with *ALLOBJ access still can directly access all of the data in the underlying DB2 table and easily bypass the security controls that are built into an SQL view.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.26145935058594, 430.86553955078125, 547.232666015625, 465.4564514160156], "page": 20, "span": [0, 247], "__ref_s3_data": null}]}, {"text": "Figure 1-2 Existing row and column controls", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.1416778564453, 91.801513671875, 316.93792724609375, 101.39622497558594], "page": 20, "span": [0, 43], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/18"}, {"text": "1.3.2 New controls: Row and Column Access Control", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 708.67724609375, 394.39227294921875, 721.4375], "page": 21, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "Based on the challenges that are associated with the existing technology available for controlling row and column access at a more granular level, IBM delivered new security support in the IBM i 7.2 release; this support is known as Row and Column Access Control (RCAC).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84750366210938, 649.1361083984375, 539.9359130859375, 695.3776245117188], "page": 21, "span": [0, 270], "__ref_s3_data": null}]}, {"text": "The new DB2 RCAC support provides a method for controlling data access across all interfaces and all types of users with a data-centric solution. Moving security processing to the database layer makes it easier to build controls that meet your compliance policies. The RCAC support provides an additional layer of security that complements object-level authorizations to limit data access to a need-to-know basis. Therefore, it is critical that you first have a sound object-level security implementation in place.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7799530029297, 567.0514526367188, 542.2587280273438, 637.3985595703125], "page": 21, "span": [0, 514], "__ref_s3_data": null}]}, {"text": "Chapter 1. Securing and protecting IBM DB2 data", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [328.35748291015625, 27.924795150756836, 529.1063232421875, 37.201438903808594], "page": 21, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "5", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.4357299804688, 27.93828010559082, 547.2176513671875, 37.7031135559082], "page": 21, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "6", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.66896057128906, 27.93828010559082, 72.8219985961914, 37.446083068847656], "page": 22, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.79338073730469, 27.71957778930664, 328.8094177246094, 37.3686637878418], "page": 22, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/19"}, {"text": "Chapter 2.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 23, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "2", "type": "page-header", "name": "Page-header", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 23, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Roles and separation of duties", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 512.864501953125, 515.1311645507812, 538.6773681640625], "page": 23, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "One of the primary objectives of row and column access control (RCAC) is to create data security policies that control and govern user access to data and limit the data access of DB2 designers and administrators to only the minimum that is required to do their jobs.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.04757690429688, 441.0284118652344, 547.3323974609375, 475.5329895019531], "page": 23, "span": [0, 266], "__ref_s3_data": null}]}, {"text": "To accomplish these tasks, RCAC engineers devised a set of functional roles that, as a group, implement effectively data access requirements and also limit the span of control of each role so that each role is given only the authorities that are needed to perform its specific set of tasks.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.86463928222656, 383.07965087890625, 547.2574462890625, 429.3913269042969], "page": 23, "span": [0, 290], "__ref_s3_data": null}]}, {"text": "This chapter describes the concepts of roles and separation of duties on DB2 for i and covers the following topics:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8092803955078, 349.0167541503906, 547.2655029296875, 371.5411071777344], "page": 23, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "GLYPH Roles", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.91671752929688, 332.0804748535156, 176.71270751953125, 342.05181884765625], "page": 23, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "GLYPH Separation of duties", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.62644958496094, 319.62664794921875, 239.9706268310547, 329.89959716796875], "page": 23, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.97868728637695, 27.77071189880371, 257.24334716796875, 37.37629318237305], "page": 23, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "7", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [540.9383544921875, 27.93828010559082, 547.2549438476562, 37.7381591796875], "page": 23, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "2.1 Roles", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.08222961425781, 702.8963012695312, 139.42576599121094, 718.1371459960938], "page": 24, "span": [0, 9], "__ref_s3_data": null}]}, {"text": "Traditionally, data access roles are defined in a binary way, where access to the data is either not permitted or access to the data is permitted. A full access capability can also be instantiated by the *ALLOBJ special authority, either explicitly or implicitly, for the security officer. If you hold the role of security officer, or have all *ALLOBJ special authority, you have access to all the data, with no exceptions. Unfortunately, this might not meet the organization's requirements for limiting access to data or separation of duties.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.79855346679688, 615.6634521484375, 547.2965087890625, 686.0182495117188], "page": 24, "span": [0, 543], "__ref_s3_data": null}]}, {"text": "To assist with defining roles and the separation of duties with appropriate authority, IBM i provides function usage IDs . A function usage ID implements granular security controls rather than granting users powerful special authorities, such as all object, job control, or service.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.95726013183594, 569.6976928710938, 547.2587890625, 603.8743286132812], "page": 24, "span": [0, 282], "__ref_s3_data": null}]}, {"text": "Roles are divided among the following DB2 functions and their corresponding function usage IDs:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.39627075195312, 536.1390380859375, 547.3123779296875, 558.0423583984375], "page": 24, "span": [0, 95], "__ref_s3_data": null}]}, {"text": "GLYPH DDM and IBM DRDAfi application server access: QIBM_DB_DDMDRDA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.72689819335938, 519.0963745117188, 474.09234619140625, 529.4824829101562], "page": 24, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "GLYPH Toolbox application server access: QIBM_DB_ZDA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.54745483398438, 506.9635925292969, 375.98358154296875, 517.3781127929688], "page": 24, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "GLYPH Database Administrator function: QIBM_DB_SQLADM", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6115264892578, 495.15960693359375, 391.5639953613281, 505.5315246582031], "page": 24, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH Database Information function: QIBM_DB_SYSMON", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.43470764160156, 483.1597900390625, 383.83270263671875, 493.47198486328125], "page": 24, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "GLYPH Security Administrator function: QIBM_DB_SECADM", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6698455810547, 471.068603515625, 385.5570983886719, 481.8918762207031], "page": 24, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "2.1.1 DDM and DRDA application server access: QIBM_DB_DDMDRDA", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.01905059814453, 438.3138732910156, 501.0556335449219, 451.5218200683594], "page": 24, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "The QIBM_DB_DDMDRDA function usage ID restricts access to the DDM and DRDA application server (QRWTSRVR). This function usage ID provides an easy alternative (rather than writing an exit program) to control access to DDM and DRDA from the server side. The function usage IDs ship with the default authority of *ALLOWED. The security officer can easily deny access to specific users or groups.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.79649353027344, 366.81915283203125, 547.2295532226562, 425.7265625], "page": 24, "span": [0, 392], "__ref_s3_data": null}]}, {"text": "This is an alternative to a User Exit Program approach. No coding is required, it is easy to change, and it is auditable.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.77101135253906, 332.93585205078125, 534.9490356445312, 355.3209533691406], "page": 24, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "2.1.2 Toolbox application server access: QIBM_DB_ZDA", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.1021957397461, 300.5572814941406, 413.2480773925781, 313.5638732910156], "page": 24, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "The QIBM_DB_ZDA function usage ID restricts access to the optimized server that handles DB2 requests from clients (QZDASOINIT and QZDASSINIT). Server access is used by the ODBC, OLE DB, and .NET providers that ship with IBM i Access for Windows and JDBC Toolbox, Run SQL scripts, and other parts of System i Navigator and Navigator for i Web console.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9031524658203, 229.11941528320312, 543.151123046875, 287.4146728515625], "page": 24, "span": [0, 350], "__ref_s3_data": null}]}, {"text": "This function usage ID provides an easy alternative (rather than writing an exit program) to control access to these functions from the server side. The function usage IDs ship with the default authority of *ALLOWED. The security officer can easily deny access to specific users or groups.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.73422241210938, 170.7083740234375, 546.2078247070312, 217.42959594726562], "page": 24, "span": [0, 289], "__ref_s3_data": null}]}, {"text": "This is an alternative to a User Exit Program approach. No coding is required, it is easy to change, and it is auditable.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.910400390625, 136.91490173339844, 534.9490966796875, 159.28839111328125], "page": 24, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "8", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.47502899169922, 27.93828010559082, 72.8219985961914, 37.502052307128906], "page": 24, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.709228515625, 27.739971160888672, 328.7253723144531, 37.31616973876953], "page": 24, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2.1.3 Database Administrator function: QIBM_DB_SQLADM", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.19741821289062, 708.67724609375, 433.47052001953125, 721.6392211914062], "page": 25, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "The Database Administrator function (QIBM_DB_SQLADM) is needed whenever a user is analyzing and viewing SQL performance data. Some of the more common database administrator functions include displaying statements from the SQL Plan Cache, analyzing SQL Performance Monitors and SQL Plan Cache Snapshots, and displaying the SQL details of a job other than your own.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8431854248047, 636.6989135742188, 547.0184326171875, 695.5401000976562], "page": 25, "span": [0, 363], "__ref_s3_data": null}]}, {"text": "The Database Administrator function provides an alternative to granting *JOBCTL, but simply having the Database Administrator authorization does not carry with it all the needed object authorities for every administration task. The default behavior is to deny authorization.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.77134704589844, 590.8430786132812, 547.3245239257812, 625.426025390625], "page": 25, "span": [0, 274], "__ref_s3_data": null}]}, {"text": "To perform database administrator tasks that are not related to performance analysis, you must refer to the details of the task to determine its specific authorization requirements. For example, to allow a database administrator to reorganize a table, the DBA must have additional object authorities to the table that are not covered by QIBM_DB_SQLADM.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.922607421875, 532.9749755859375, 541.2985229492188, 579.3148193359375], "page": 25, "span": [0, 352], "__ref_s3_data": null}]}, {"text": "Granting QIBM_DB_SQLADM function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.28472900390625, 505.07904052734375, 392.7084045410156, 517.3775024414062], "page": 25, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Only the security administrator (*SECADM) is allowed to change the list of users that can perform Database Administration functions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.11285400390625, 480.2784729003906, 532.0657348632812, 502.6061706542969], "page": 25, "span": [0, 132], "__ref_s3_data": null}]}, {"text": "2.1.4 Database Information function: QIBM_DB_SYSMON", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.2179183959961, 447.6772766113281, 419.47637939453125, 460.86566162109375], "page": 25, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "The Database Information function (QIBM_DB_SYSMON) provides much less authority than Database Administrator function. Its primary use allows a user to examine high-level database properties.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.95957946777344, 400.29864501953125, 547.1928100585938, 434.8374328613281], "page": 25, "span": [0, 190], "__ref_s3_data": null}]}, {"text": "For example, a user that does not have *JOBCTL or QIBM_DB_SQLADM can still view the SQL Plan Cache properties if granted authority to QIBM_DB_SYSMON. Without granting this authority, the default behavior is to deny authorization.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2222900390625, 354.16015625, 547.2994384765625, 388.70263671875], "page": 25, "span": [0, 229], "__ref_s3_data": null}]}, {"text": "Granting QIBM_DB_SYSMON function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.3820037841797, 326.02716064453125, 392.7384033203125, 338.0588684082031], "page": 25, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Only the security administrator (*SECADM) is allowed to change the list of users that can perform Database Information functions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.12899780273438, 301.23870849609375, 532.0657348632812, 323.3178405761719], "page": 25, "span": [0, 129], "__ref_s3_data": null}]}, {"text": "2.1.5 Security Administrator function: QIBM_DB_SECADM", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.09640502929688, 268.63739013671875, 427.0501403808594, 281.47467041015625], "page": 25, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "The Security Administrator function (QIBM_DB_SECADM) grants authorities, revokes authorities, changes ownership, or changes the primary group without giving access to the object or, in the case of a database table, to the data that is in the table or allowing other operations on the table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.72735595703125, 208.98062133789062, 538.3322143554688, 255.28724670410156], "page": 25, "span": [0, 290], "__ref_s3_data": null}]}, {"text": "Only those users with the QIBM_DB_SECADM function can administer and manage RCAC rules. RCAC can be used to prevent even users with *ALLOBJ authority from freely accessing all the data in a protected database. These users are excluded from data access unless they are specifically authorized by RCAC. Without granting this authority, the default behavior is to deny authorization.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.85955810546875, 138.8216094970703, 547.2896118164062, 197.575927734375], "page": 25, "span": [0, 380], "__ref_s3_data": null}]}, {"text": "Granting QIBM_DB_SECADM function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.57452392578125, 111.07710266113281, 392.72161865234375, 123.48632049560547], "page": 25, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Only QSECOFR or a user with *SECADM special authority can grant the QIBM_DB_SECADM function usage to a user or group.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.02122497558594, 85.55487823486328, 460.46807861328125, 108.20436096191406], "page": 25, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "Chapter 2. Roles and separation of duties", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [360.43804931640625, 27.703941345214844, 529.1567993164062, 37.179229736328125], "page": 25, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "9", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.2987060546875, 27.93828010559082, 547.2176513671875, 37.731666564941406], "page": 25, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "2.1.6 Change Function Usage CL command", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.13709259033203, 708.0068969726562, 335.4955139160156, 721.5980834960938], "page": 26, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "The following CL commands can be used to work with, display, or change function usage IDs:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.92727661132812, 684.818115234375, 547.284423828125, 695.4608764648438], "page": 26, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "GLYPH Work Function Usage ( WRKFCNUSG )", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.50738525390625, 667.6542358398438, 301.5174865722656, 678.104248046875], "page": 26, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Change Function Usage ( CHGFCNUSG )", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.541015625, 655.2158203125, 313.39776611328125, 666.201416015625], "page": 26, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "GLYPH Display Function Usage ( DSPFCNUSG )", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.42544555664062, 643.4954223632812, 310.8171081542969, 654.1441040039062], "page": 26, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "For example, the following CHGFCNUSG command shows granting authorization to user HBEDOYA to administer and manage RCAC rules:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.18296813964844, 610.2984619140625, 512.5380249023438, 632.472412109375], "page": 26, "span": [0, 126], "__ref_s3_data": null}]}, {"text": "CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(HBEDOYA) USAGE(*ALLOWED)", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.22215270996094, 592.9798583984375, 441.59686279296875, 603.69287109375], "page": 26, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "2.1.7 Verifying function usage IDs for RCAC with the FUNCTION_USAGE view", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [63.92118835449219, 560.307373046875, 544.4754638671875, 573.6224365234375], "page": 26, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "The FUNCTION_USAGE view contains function usage configuration details. Table 2-1 describes the columns in the FUNCTION_USAGE view.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.69790649414062, 525.2785034179688, 519.5179443359375, 547.3473510742188], "page": 26, "span": [0, 130], "__ref_s3_data": null}]}, {"text": "Table 2-1 FUNCTION_USAGE view", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.8000030517578, 504.11700439453125, 285.07135009765625, 513.564697265625], "page": 26, "span": [0, 29], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/4"}, {"text": "To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81417846679688, 318.2784729003906, 547.2803955078125, 340.825439453125], "page": 26, "span": [0, 112], "__ref_s3_data": null}]}, {"text": "Example 2-1 Query to determine who has authority to define and manage RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.5948028564453, 296.4012145996094, 463.2222900390625, 306.1437072753906], "page": 26, "span": [0, 74], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/5"}, {"text": "2.2 Separation of duties", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.28413391113281, 156.46859741210938, 249.59605407714844, 172.32984924316406], "page": 26, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.95748901367188, 81.68995666503906, 547.2234497070312, 140.11083984375], "page": 26, "span": [0, 463], "__ref_s3_data": null}]}, {"text": "10", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.75320434570312, 27.93828010559082, 78.4020004272461, 37.570556640625], "page": 26, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.40293884277344, 27.698881149291992, 334.4214172363281, 37.30914306640625], "page": 26, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.11419677734375, 651.2788696289062, 542.6943359375, 721.3893432617188], "page": 27, "span": [0, 516], "__ref_s3_data": null}]}, {"text": "In IBM i 7.2, the QIBM_DB_SECADM function usage grants authorities, revokes authorities, changes ownership, or changes the primary group without giving access to the object or, in the case of a database table, to the data that is in the table or allowing other operations on the table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9490509033203, 593.2598266601562, 547.303955078125, 639.5579833984375], "page": 27, "span": [0, 285], "__ref_s3_data": null}]}, {"text": "QIBM_DB_SECADM function usage can be granted only by a user with *SECADM special authority and can be given to a user or a group.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1680145263672, 558.7716064453125, 538.6507568359375, 581.5887451171875], "page": 27, "span": [0, 129], "__ref_s3_data": null}]}, {"text": "QIBM_DB_SECADM also is responsible for administering RCAC, which restricts which rows a user is allowed to access in a table and whether a user is allowed to see information in certain columns of a table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.04412841796875, 513.281005859375, 545.7960205078125, 547.52294921875], "page": 27, "span": [0, 204], "__ref_s3_data": null}]}, {"text": "A preferred practice is that the RCAC administrator has the QIBM_DB_SECADM function usage ID, but absolutely no other data privileges. The result is that the RCAC administrator can deploy and maintain the RCAC constructs, but cannot grant themselves unauthorized access to data itself.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91064453125, 455.2619934082031, 539.80712890625, 501.6045837402344], "page": 27, "span": [0, 285], "__ref_s3_data": null}]}, {"text": "Table 2-2 shows a comparison of the different function usage IDs and *JOBCTL authority to the different CL commands and DB2 for i tools.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.74993896484375, 421.3023681640625, 543.067138671875, 443.2637634277344], "page": 27, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "Table 2-2 Comparison of the different function usage IDs and *JOBCTL authority", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.80000305175781, 399.966552734375, 392.8609619140625, 409.5616149902344], "page": 27, "span": [0, 78], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/6"}, {"text": "Chapter 2. Roles and separation of duties", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [354.5693359375, 27.851680755615234, 523.5407104492188, 37.16465377807617], "page": 27, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "11", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.497520446777344], "page": 27, "span": [0, 2], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/7"}, {"text": "12", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.78636169433594, 27.93828010559082, 78.4020004272461, 37.45695877075195], "page": 28, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.417724609375, 27.724334716796875, 334.4214172363281, 37.30672836303711], "page": 28, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/20"}, {"text": "Chapter 3.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 29, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "3", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.859619140625], "page": 29, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 513.0821533203125, 546.0291748046875, 538.6365356445312], "page": 29, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "This chapter describes what Row and Column Access Control (RCAC) is, its components, and then illustrates RCAC with a simple example.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7967987060547, 453.0984802246094, 536.1522827148438, 475.3611755371094], "page": 29, "span": [0, 133], "__ref_s3_data": null}]}, {"text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1170654296875, 431.07891845703125, 347.4121398925781, 441.4175109863281], "page": 29, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Explanation of RCAC and the concept of access control", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.55429077148438, 413.6905822753906, 397.1086730957031, 423.97503662109375], "page": 29, "span": [0, 69], "__ref_s3_data": null}]}, {"text": "GLYPH Special registers and built-in global variables", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65321350097656, 401.5838623046875, 348.458984375, 412.17449951171875], "page": 29, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH VERIFY_GROUP_FOR_USER function", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7012939453125, 389.8255310058594, 327.0360107421875, 400.37957763671875], "page": 29, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "GLYPH Establishing and controlling accessibility by using the RCAC rule text", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.52816772460938, 377.255859375, 454.2698669433594, 388.0998229980469], "page": 29, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "GLYPH SELECT, INSERT, and UPDATE behavior with RCAC", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.47698974609375, 366.099853515625, 385.8156433105469, 376.4607238769531], "page": 29, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "GLYPH Human resources example", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5319061279297, 354.047119140625, 270.3636169433594, 364.00299072265625], "page": 29, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.9825439453125, 27.802661895751953, 257.24334716796875, 37.31828308105469], "page": 29, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "13", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.526145935058594], "page": 29, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "3.1 Explanation of RCAC and the concept of access control", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.37876892089844, 702.5391845703125, 518.7757568359375, 718.1989135742188], "page": 30, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "RCAC limits data access to those users who have a business \"need to know\". RCAC makes it easy to set up a rich and robust security policy that is based on roles and responsibilities. RCAC functionality is made available through the optional, no charge feature called \"IBM Advanced Data Security for i\", also known as option 47 of IBM i 7.2.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.11453247070312, 639.6259765625, 547.24267578125, 686.0366821289062], "page": 30, "span": [0, 340], "__ref_s3_data": null}]}, {"text": "In DB2 for i, RCAC is implemented using two different approaches that address the shortcomings of traditional control methods and mechanisms:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0245819091797, 605.769287109375, 505.92376708984375, 627.9368896484375], "page": 30, "span": [0, 141], "__ref_s3_data": null}]}, {"text": "GLYPH Row permissions", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.82949829101562, 589.0179443359375, 227.34536743164062, 599.0405883789062], "page": 30, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "GLYPH Column masks", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7431182861328, 577.1798095703125, 217.25091552734375, 587.1215209960938], "page": 30, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "Another benefit of RCAC is that no database user is automatically exempt from the control. Users with *ALLOBJ authority can no longer freely access all of the data in the database unless they have the appropriate permission to do so. The ability to manage row permissions and column masks rests with the database security administrator. The RCAC definitions, enablement, and activation are controlled by SQL statements.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.83267211914062, 506.98077392578125, 547.2017822265625, 565.4414672851562], "page": 30, "span": [0, 419], "__ref_s3_data": null}]}, {"text": "Row permissions and column masks require virtually no application changes. RCAC is based on specific rules that are transparent to existing applications and SQL interfaces. Enforcement of your security policy does not depend on how applications or tools access the data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1253204345703, 449.1419372558594, 547.2973022460938, 495.46649169921875], "page": 30, "span": [0, 270], "__ref_s3_data": null}]}, {"text": "RCAC also facilitates multi-tenancy, which means that several independent customers or business units can share a single database table without being aware of one another. The RCAC row permission ensures each user sees only the rows they are entitled to view because the enforcement is handled by DB2 and not the application logic.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.33770751953125, 391.0147705078125, 535.672119140625, 437.35345458984375], "page": 30, "span": [0, 331], "__ref_s3_data": null}]}, {"text": "Label-based access control (LBAC): RCAC and LBAC are not the same thing. LBAC is a security model that is primarily intended for government applications. LBAC requires that data and users be classified with a fixed set of rules that are implemented. RCAC is a general-purpose security model that is primarily intended for commercial customers. You can use RCAC to create your own security rules, which in turn allows for more flexibility.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [141.874267578125, 314.53131103515625, 541.2496337890625, 373.4546813964844], "page": 30, "span": [0, 438], "__ref_s3_data": null}]}, {"text": "3.1.1 Row permission and column mask definitions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.27526092529297, 277.1085205078125, 383.4799499511719, 290.117431640625], "page": 30, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "The following sections define row permission and column masks.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.13406372070312, 253.6727752685547, 423.5354309082031, 264.1443786621094], "page": 30, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "Row permission", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.67124938964844, 226.04348754882812, 229.260009765625, 238.01478576660156], "page": 30, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "A row permission is a database object that manifests a row access control rule for a specific table. It is essentially a search condition that describes which rows you can access. For example, a manager can see only the rows that represent his or her employees.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.80856323242188, 189.06199645996094, 544.5714721679688, 223.27346801757812], "page": 30, "span": [0, 261], "__ref_s3_data": null}]}, {"text": "14", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.69788360595703, 27.93828010559082, 78.4020004272461, 37.5636100769043], "page": 30, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.36235809326172, 27.683774948120117, 334.4214172363281, 37.30181884765625], "page": 30, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "The SQL CREATE PERMISSION statement that is shown in Figure 3-1 is used to define and initially enable or disable the row access rules.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.776123046875, 699.2013549804688, 528.7305908203125, 721.4013061523438], "page": 31, "span": [0, 135], "__ref_s3_data": null}]}, {"text": "Figure 3-1 CREATE PERMISSION SQL statement", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0135498046875, 369.07928466796875, 342.5756530761719, 378.6520080566406], "page": 31, "span": [0, 42], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/21"}, {"text": "Column mask", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.67723083496094, 340.95599365234375, 215.9995574951172, 352.8724060058594], "page": 31, "span": [0, 11], "__ref_s3_data": null}]}, {"text": "A column mask is a database object that manifests a column value access control rule for a specific column in a specific table. It uses a CASE expression that describes what you see when you access the column. For example, a teller can see only the last four digits of a tax identification number.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.825439453125, 291.6988525390625, 542.7664794921875, 338.13787841796875], "page": 31, "span": [0, 297], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.59918212890625, 27.955989837646484, 523.6016235351562, 37.184593200683594], "page": 31, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "15", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.46247100830078], "page": 31, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "16", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.6975326538086, 27.93828010559082, 78.4020004272461, 37.4251594543457], "page": 32, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.41210174560547, 27.731887817382812, 334.4214172363281, 37.32145690917969], "page": 32, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Column masks replace the need to create and use views to implement access control. The SQL CREATE MASK statement that is shown in Figure 3-2 is used to define and initially enable or disable the column value access rules.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9098358154297, 687.2786865234375, 546.5693359375, 721.3076171875], "page": 32, "span": [0, 221], "__ref_s3_data": null}]}, {"text": "Figure 3-2 CREATE MASK SQL statement", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.08889770507812, 365.8803405761719, 311.9504089355469, 375.2521667480469], "page": 32, "span": [0, 36], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/22"}, {"text": "3.1.2 Enabling and activating RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.18714141845703, 332.8692932128906, 286.7970275878906, 346.2061767578125], "page": 32, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "You can enable, disable, or regenerate row permissions and column masks by using the SQL ALTER PERMISSION statement and the SQL ALTER MASK statement, as shown in Figure 3-3 on page 17.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.41273498535156, 285.53582763671875, 547.1828002929688, 320.0932312011719], "page": 32, "span": [0, 184], "__ref_s3_data": null}]}, {"text": "Enabling and disabling effectively turns on or off the logic that is contained in the row permission or column mask. Regenerating causes the row permission or column mask to be regenerated. The row permission definition in the catalog is used and existing dependencies and authorizations, if any, are retained. The row permission definition is reevaluated as though the row permission were being created. Any user-defined functions (UDFs) that are referenced in the row permission must be resolved to the same secure UDFs as were resolved during the original row permission or column mask creation. The regenerate option can be used to ensure that the RCAC logic is intact and still valid before any user attempts to access the table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.00491333007812, 168.1006622314453, 547.2286376953125, 274.0113830566406], "page": 32, "span": [0, 734], "__ref_s3_data": null}]}, {"text": "Note: An exclusive lock is required on the table object to perform the alter operation. All open cursors must be closed.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.18338012695312, 127.9096450805664, 531.8515625, 149.98568725585938], "page": 32, "span": [0, 120], "__ref_s3_data": null}]}, {"text": "Figure 3-3 ALTER PERMISSION and ALTER MASK SQL statements", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.984130859375, 432.4007263183594, 415.3210144042969, 442.02081298828125], "page": 33, "span": [0, 57], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/23"}, {"text": "You can activate and deactivate RCAC for new or existing tables by using the SQL ALTER TABLE statement (Figure 3-4). The ACTIVATE or DEACTIVATE clause must be the option that is specified in the statement. No other alterations are permitted at the same time. The activating and deactivating effectively turns on or off all RCAC processing for the table. Only enabled row permissions and column masks take effect when activating RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7842254638672, 357.56317138671875, 547.2994995117188, 416.1618347167969], "page": 33, "span": [0, 433], "__ref_s3_data": null}]}, {"text": "Note: An exclusive lock is required on the table object to perform the alter operation. All open cursors must be closed.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.0438995361328, 317.40087890625, 531.8515625, 339.8829650878906], "page": 33, "span": [0, 120], "__ref_s3_data": null}]}, {"text": "Figure 3-4 ALTER TABLE SQL statement", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.43646240234375, 57.76702117919922, 306.6454772949219, 67.19706726074219], "page": 33, "span": [0, 36], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/24"}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.5545349121094, 27.893524169921875, 523.6016235351562, 37.2478141784668], "page": 33, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "17", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.54359436035156], "page": 33, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "18", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.6755142211914, 27.93828010559082, 78.4020004272461, 37.471988677978516], "page": 34, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.40727233886719, 27.745635986328125, 334.4214172363281, 37.298118591308594], "page": 34, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "When row access control is activated on a table, a default permission is established for that table. The name of this permission is QIBM_DEFAULT_ _. This default permission contains a simple piece of logic (0=1) which is never true. The default permission effectively denies access to every user unless there is a permission defined that allows access explicitly. If row access control is activated on a table, and there is no permission that is defined, no one has permission to any rows. All queries against the table produce an empty set.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6855926513672, 638.6604614257812, 547.273681640625, 721.4677124023438], "page": 34, "span": [0, 566], "__ref_s3_data": null}]}, {"text": "It is possible to define, create, and enable multiple permissions on a table. Logically, all of the permissions are ORed together to form a comprehensive test of the user's ability to access the data. A column can have only one mask that is defined over it. From an implementation standpoint, it does not matter if you create the column masks first or the row permissions first.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.01759338378906, 580.7723388671875, 547.2686157226562, 627.36962890625], "page": 34, "span": [0, 378], "__ref_s3_data": null}]}, {"text": "Note: If a user does not have permission to access the row, the column mask logic is not invoked.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.60574340820312, 541.2984619140625, 537.4868774414062, 563.368896484375], "page": 34, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "3.2 Special registers and built-in global variables", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3016128540039, 492.1618347167969, 438.7572021484375, 508.3872985839844], "page": 34, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "This section describes how you can use special registers and built-in global variables to implement RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.88925170898438, 454.2984619140625, 525.5018920898438, 476.74810791015625], "page": 34, "span": [0, 105], "__ref_s3_data": null}]}, {"text": "3.2.1 Special registers", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.38701629638672, 420.9353942871094, 204.5852813720703, 434.2147216796875], "page": 34, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "A special register is a storage area that is defined for an application process by DB2 and is used to store information that can be referenced in SQL statements. A reference to a special register is a reference to a value that is provided by the current server.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0377197265625, 373.72930908203125, 547.1063842773438, 407.9320983886719], "page": 34, "span": [0, 261], "__ref_s3_data": null}]}, {"text": "IBM DB2 for i supports four different special registers that can be used to identify what user profiles are relevant to determining object authorities in the current connection to the server. SQL uses the term runtime authorization ID , which corresponds to a user profile on DB2 for i. Here are the four special registers:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.25625610351562, 316.2984619140625, 544.5763549804688, 362.75390625], "page": 34, "span": [0, 323], "__ref_s3_data": null}]}, {"text": "GLYPH USER is the runtime user profile that determines the object authorities for the current connection to the server. It has a data type of VARCHAR(18). This value can be changed by the SQL statement SET SESSION AUTHORIZATION .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56118774414062, 274.8968200683594, 546.981201171875, 309.5057067871094], "page": 34, "span": [0, 239], "__ref_s3_data": null}]}, {"text": "GLYPH SESSION_USER is the same as the USER register, except that it has a data type of VARCHAR(128).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4746551513672, 246.278564453125, 522.5455932617188, 268.92169189453125], "page": 34, "span": [0, 110], "__ref_s3_data": null}]}, {"text": "GLYPH CURRENT USER was added in IBM i 7.2 and is similar to the USER register, but it has one important difference in that it also reports adopted authority. High-level language programs and SQL routines such as functions, procedures, and triggers can optionally be created to run using either the caller's or the owner's user profile to determine data authorities. For example, an SQL procedure can be created to run under the owner's authority by specifying SET OPTION USRPRF=*OWNER . This special register can also be referenced as CURRENT_USER. It has a data type of VARCHAR(128).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.55592346191406, 157.2052459716797, 547.2483520507812, 239.66249084472656], "page": 34, "span": [0, 594], "__ref_s3_data": null}]}, {"text": "GLYPH SYSTEM_USER is the user profile that initiates the connection to the server. It is not used by RCAC, but is included here for completeness. Many jobs, including the QZDASOINIT prestarted jobs, initially connect to the server with a default user profile and then change to use some other user profile. SYSTEM_USER reports this value, typically QUSER for a QZDASOINIT job. It has a data type of VARCHAR(128).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.612060546875, 91.61019897460938, 547.2650756835938, 150.18948364257812], "page": 34, "span": [0, 422], "__ref_s3_data": null}]}, {"text": "In addition to these four special registers, any of the DB2 special registers can be referenced as part of the rule text.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0863800048828, 57.777366638183594, 547.2014770507812, 80.15410614013672], "page": 34, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "Table 3-1 summarizes these special registers and their values.", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [135.83859252929688, 710.943603515625, 412.20758056640625, 721.3551635742188], "page": 35, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "Table 3-1 Special registers and their corresponding values", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.8000030517578, 690.0230102539062, 373.028076171875, 699.3997802734375], "page": 35, "span": [0, 58], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/8"}, {"text": "Figure 3-5 shows the difference in the special register values when an adopted authority is used:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2273712158203, 556.2984619140625, 538.493896484375, 578.5513305664062], "page": 35, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "GLYPH A user connects to the server using the user profile ALICE.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69070434570312, 538.869384765625, 411.36138916015625, 549.4674682617188], "page": 35, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "GLYPH USER and CURRENT USER initially have the same value of ALICE.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.39437866210938, 522.23193359375, 453.2580871582031, 532.7328491210938], "page": 35, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "GLYPH ALICE calls an SQL procedure that is named proc1, which is owned by user profile JOE and was created to adopt JOE's authority when it is called.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.37547302246094, 492.69061279296875, 541.4498291015625, 515.4265747070312], "page": 35, "span": [0, 160], "__ref_s3_data": null}]}, {"text": "GLYPH While the procedure is running, the special register USER still contains the value of ALICE because it excludes any adopted authority. The special register CURRENT USER contains the value of JOE because it includes any adopted authority.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.46484375, 451.6559143066406, 547.2167358398438, 486.2220153808594], "page": 35, "span": [0, 253], "__ref_s3_data": null}]}, {"text": "GLYPH When proc1 ends, the session reverts to its original state with both USER and CURRENT USER having the value of ALICE.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.61715698242188, 423.015869140625, 547.3540649414062, 445.505615234375], "page": 35, "span": [0, 133], "__ref_s3_data": null}]}, {"text": "Figure 3-5 Special registers and adopted authority", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.28187561035156, 186.33480834960938, 342.36993408203125, 195.70782470703125], "page": 35, "span": [0, 50], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/25"}, {"text": "3.2.2 Built-in global variables", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.09709930419922, 154.2414093017578, 247.02536010742188, 167.46414184570312], "page": 35, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "Built-in global variables are provided with the database manager and are used in SQL statements to retrieve scalar values that are associated with the variables.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.147705078125, 119.0784683227539, 518.0011596679688, 141.2449493408203], "page": 35, "span": [0, 161], "__ref_s3_data": null}]}, {"text": "IBM DB2 for i supports nine different built-in global variables that are read only and maintained by the system. These global variables can be used to identify attributes of the database connection and used as part of the RCAC logic.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06741333007812, 72.93258666992188, 532.3385009765625, 107.38182830810547], "page": 35, "span": [0, 233], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.6591491699219, 27.951305389404297, 523.6016235351562, 37.228702545166016], "page": 35, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "19", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.58589172363281], "page": 35, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "20", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.01134490966797, 27.93828010559082, 78.4020004272461, 37.716331481933594], "page": 36, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.40630340576172, 27.696765899658203, 334.4214172363281, 37.33338928222656], "page": 36, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Table 3-2 lists the nine built-in global variables.", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [135.99867248535156, 711.0256958007812, 342.5477294921875, 721.4012451171875], "page": 36, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "Table 3-2 Built-in global variables", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.80000305175781, 690.177001953125, 201.90721130371094, 699.1590576171875], "page": 36, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/9"}, {"text": "3.3 VERIFY_GROUP_FOR_USER function", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.28919219970703, 455.0362854003906, 384.3638916015625, 471.1539611816406], "page": 36, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "The VERIFY_GROUP_FOR_USER function was added in IBM i 7.2. Although it is primarily intended for use with RCAC permissions and masks, it can be used in other SQL statements. The first parameter must be one of these three special registers: SESSION_USER, USER, or CURRENT_USER. The second and subsequent parameters are a list of user or group profiles. Each of these values must be 1 - 10 characters in length. These values are not validated for their existence, which means that you can specify the names of user profiles that do not exist without receiving any kind of error.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.70318603515625, 355.5380859375, 547.2347412109375, 438.36163330078125], "page": 36, "span": [0, 576], "__ref_s3_data": null}]}, {"text": "If a special register value is in the list of user profiles or it is a member of a group profile included in the list, the function returns a long integer value of 1. Otherwise, it returns a value of 0. It never returns the null value.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1623077392578, 310.2999572753906, 547.2573852539062, 344.6438293457031], "page": 36, "span": [0, 235], "__ref_s3_data": null}]}, {"text": "Here is an example of using the VERIFY_GROUP_FOR_USER function:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.18377685546875, 288.2803955078125, 458.44525146484375, 298.98516845703125], "page": 36, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "1. There are user profiles for MGR, JANE, JUDY, and TONY.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80001831054688, 271.2185363769531, 406.0775146484375, 281.7854919433594], "page": 36, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "2. The user profile JANE specifies a group profile of MGR.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.9613494873047, 253.6958770751953, 396.9881591796875, 264.2366027832031], "page": 36, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.15640258789062, 225.28138732910156, 536.568603515625, 247.44735717773438], "page": 36, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [150.25143432617188, 149.68975830078125, 451.01605224609375, 217.97032165527344], "page": 36, "span": [0, 265], "__ref_s3_data": null}]}, {"text": "3.4 Establishing and controlling accessibility by using the RCAC rule text", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.63925170898438, 687.0557861328125, 512.90087890625, 721.5410766601562], "page": 37, "span": [0, 74], "__ref_s3_data": null}]}, {"text": "When defining a row permission or column mask, the \"magic\" of establishing and controlling accessibility comes from the rule text . The rule text represents the search criteria and logic that is implemented by the database engine.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.79978942871094, 636.2785034179688, 544.9019775390625, 670.8543701171875], "page": 37, "span": [0, 230], "__ref_s3_data": null}]}, {"text": "In the case of a row permission, the rule text is the \"test\" of whether the user can access the row. If the test result is true, the row can be accessed. If the test result is false, the row essentially does not exist for the user. From a set-at-a-time perspective, the permission defines which rows can be part of the query result set, and which rows cannot.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0677032470703, 578.1064453125, 545.6567993164062, 623.906982421875], "page": 37, "span": [0, 359], "__ref_s3_data": null}]}, {"text": "In the case of a column mask, the rule text is both the test of whether the user can see the actual column value, and it is the masking logic if the user cannot have access to actual column value.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2191619873047, 532.300048828125, 537.8551025390625, 566.6549072265625], "page": 37, "span": [0, 196], "__ref_s3_data": null}]}, {"text": "For a simple example of implementing row permissions and column masks, see 3.6, \"Human resources example\" on page 22.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.39344787597656, 498.2806396484375, 547.2691040039062, 520.6704711914062], "page": 37, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "In general, almost any set-based, relational logic is valid. For the row permission, the search condition follows the same rules that are used by the search condition in a WHERE clause.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1230926513672, 464.1593933105469, 545.9476928710938, 485.88037109375], "page": 37, "span": [0, 185], "__ref_s3_data": null}]}, {"text": "For the column mask, the logic follows the same rules as the CASE expression. The result data type, length, null attribute, and CCSID of the CASE expression must be compatible with the data type of the column. If the column does not allow the null value, the result of the CASE expression cannot be the NULL value. The application or interface making the data access request is expecting that all of the column attributes and values are consistent with the original definition, regardless of any masking.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1824188232422, 382.2794494628906, 547.2494506835938, 452.6309509277344], "page": 37, "span": [0, 504], "__ref_s3_data": null}]}, {"text": "For more information about what is permitted, see the \"Database programming\" topic of the IBM i 7.2 Knowledge Center, found at:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.42941284179688, 348.2829895019531, 542.0492553710938, 370.6573181152344], "page": 37, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgdbp.htm?lang =en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.240966796875, 319.5921936035156, 546.5352172851562, 341.3039855957031], "page": 37, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "One of the first tasks in either the row permission or the column mask logic is to determine who the user is, and whether they have access to the data. Elegant methods to establish the identity and attributes of the user can be employed by using the special registers, global variables, and the VERIFY function. After the user's identity is established, it is a simple matter of allowing or disallowing access by using true or false testing. The examples that are included in this paper demonstrate some of the more common and obvious techniques.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0011749267578, 237.2847442626953, 547.0272216796875, 307.4244079589844], "page": 37, "span": [0, 546], "__ref_s3_data": null}]}, {"text": "More sophisticated methods can employ existential, day of year / time of day, and relational comparisons with set operations. For example, you can use a date master or date dimension table to determine whether the current date is a normal business day. If the current date is a valid business day, then access is allowed. If the current date is not a business day (for example a weekend day or holiday), access is denied. This test can be accomplished by performing a lookup using a subquery, such as the one that is shown in Example 3-1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.90562438964844, 155.11756896972656, 547.3748779296875, 225.27194213867188], "page": 37, "span": [0, 538], "__ref_s3_data": null}]}, {"text": "Example 3-1 Subquery that is used as part of the rule", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.77896118164062, 133.42030334472656, 355.3341369628906, 143.4773406982422], "page": 37, "span": [0, 53], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/10"}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.5984191894531, 27.918827056884766, 523.6016235351562, 37.22893142700195], "page": 37, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "21", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.4708862304688, 27.93828010559082, 547.2591552734375, 37.50567626953125], "page": 37, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "22", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.00093841552734, 27.93828010559082, 78.4020004272461, 37.64419937133789], "page": 38, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.40680694580078, 27.702280044555664, 334.4214172363281, 37.302345275878906], "page": 38, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Given that joins and subqueries can be used to perform set-based operations against existing data that is housed in other objects, almost any relational test can be constructed. If the data in the objects is manipulated over time, the RCAC test logic (and user query results) can be changed without modifying the actual row permission or column mask. This includes moving a user from one group to another or changing a column value that is used to allow or disallow access. For example, if Saturday is now a valid business day, only the BUSINESS_DAY value in the DATE_MASTER must be updated, not the permission logic. This technique can potentially avoid downtime because of the exclusive lock that is needed on the table when adding or changing RCAC definitions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.98097229003906, 614.8680419921875, 547.2915649414062, 721.5286254882812], "page": 38, "span": [0, 763], "__ref_s3_data": null}]}, {"text": "3.5 SELECT, INSERT, and UPDATE behavior with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.369140625, 572.0363159179688, 486.3243713378906, 587.82958984375], "page": 38, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "RCAC provides a database-centric approach to determining which rows can be accessed and what column values can be seen by a specific user. Given that the control is handled by DB2 internally, every data manipulation statement is under the influence of RCAC, with no exceptions. When accessing the table, the SELECT statements, searched UPDATE statements, and searched DELETE statements implicitly and transparently contain the row permission and the column mask rule text. This means that the data set can be logically restricted and reduced on a user by user basis.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89967346191406, 473.217529296875, 547.2974243164062, 555.4274291992188], "page": 38, "span": [0, 566], "__ref_s3_data": null}]}, {"text": "Furthermore, DB2 prevents an INSERT statement from inserting a row or an UPDATE statement from modifying a row such that the current user cannot be permitted to access it. You cannot create a situation in which the data you inserted or changed is no longer accessible to you.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.10336303710938, 427.2479248046875, 547.1958618164062, 461.4225158691406], "page": 38, "span": [0, 275], "__ref_s3_data": null}]}, {"text": "For more information and considerations about data movement in an RCAC environment, see Chapter 6, \"Additional considerations\" on page 85.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.00917053222656, 393.11602783203125, 547.2606811523438, 415.4470520019531], "page": 38, "span": [0, 138], "__ref_s3_data": null}]}, {"text": "Note: DB2 does not provide any indication back to the user that the data set requested was restricted or reduced by RCAC. This is by design, as it helps minimize any changes to the applications accessing the data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.0465545654297, 340.999267578125, 541.2198486328125, 375.3176574707031], "page": 38, "span": [0, 213], "__ref_s3_data": null}]}, {"text": "3.6 Human resources example", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.39986419677734, 293.036376953125, 298.8533935546875, 308.852783203125], "page": 38, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "This section illustrates with a simple example the usage of RCAC on a typical Human Resources application (schema). In this sample Human Resources schema, there is an important table that is called EMPLOYEES that contains all the information that is related to the employees of the company. Among the information that normally is stored in the EMPLOYEES table, there is some sensitive information that must be hidden from certain users:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.86647033691406, 206.2593536376953, 542.829345703125, 276.0174255371094], "page": 38, "span": [0, 436], "__ref_s3_data": null}]}, {"text": "GLYPH Tax_Id information", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.812744140625, 189.279541015625, 235.38601684570312, 199.46461486816406], "page": 38, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "GLYPH YEAR of the birth date of the employee (hiding the age of the employee)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6238250732422, 177.03131103515625, 470.03765869140625, 187.34808349609375], "page": 38, "span": [0, 87], "__ref_s3_data": null}]}, {"text": "In this example, there are four different types of users:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.17919921875, 154.9170379638672, 375.29803466796875, 165.3710174560547], "page": 38, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "GLYPH Employees", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6767120361328, 138.28036499023438, 200.1146697998047, 148.322509765625], "page": 38, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "GLYPH Managers", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.50352478027344, 125.92362976074219, 195.63865661621094, 136.71217346191406], "page": 38, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "GLYPH Human Resources Manager", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.46554565429688, 114.28074645996094, 276.60760498046875, 124.81513977050781], "page": 38, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "GLYPH Consultant/IT Database Engineer (In this example, this person is an external consultant that is not an employee of the company.)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.2389678955078, 90.28113555908203, 539.58447265625, 112.66990661621094], "page": 38, "span": [0, 144], "__ref_s3_data": null}]}, {"text": "The following sections describe step-by-step what is needed to be done to implement RCAC in this environment.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7884979248047, 56.261756896972656, 546.5243530273438, 77.96189880371094], "page": 38, "span": [0, 109], "__ref_s3_data": null}]}, {"text": "3.6.1 Assigning the QIBM_DB_SECADM function ID to the consultants", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.26187133789062, 708.1668701171875, 500.5502014160156, 721.559814453125], "page": 39, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "The consultant must have authority to implement RCAC, so you must use one of the function IDs that are provided in DB2 for i (see 2.1.5, \"Security Administrator function: QIBM_DB_SECADM\" on page 9). Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6376190185547, 660.9580688476562, 547.2426147460938, 695.4973754882812], "page": 39, "span": [0, 228], "__ref_s3_data": null}]}, {"text": "1. Run the Change Functional Usage ( CHGFCNUSG ) CL commands that are shown in Example 3-2. These commands must be run by someone that has the *SECOFR authority.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 619.9730834960938, 510.97723388671875, 654.0428466796875], "page": 39, "span": [0, 161], "__ref_s3_data": null}]}, {"text": "Example 3-2 Function ID required to implement RCAC", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 598.985107421875, 358.47119140625, 608.8162841796875], "page": 39, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(HBEDOYA) USAGE(*ALLOWED) CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(MCAIN) USAGE(*ALLOWED)", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.4434051513672, 568.8560791015625, 441.59588623046875, 591.7393798828125], "page": 39, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "2. There is a way to discover which user profiles have authorization to implement RCAC. This can be done by running the SQL statement that is shown in Example 3-3.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.14793395996094, 533.0126953125, 547.2882080078125, 555.4136962890625], "page": 39, "span": [0, 163], "__ref_s3_data": null}]}, {"text": "Example 3-3 Verifying what user profiles have authorization to implement RCAC", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.76101684570312, 511.7995300292969, 460.1641540527344, 521.465576171875], "page": 39, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "SELECT function_id, user_name, usage, user_type FROM qsys2.function_usage WHERE function_id ='QIBM_DB_SECADM' ORDER BY user_name;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.20632934570312, 420.8342590332031, 346.6770935058594, 505.3314208984375], "page": 39, "span": [0, 129], "__ref_s3_data": null}]}, {"text": "3. The result of the SQL statement is shown in Figure 3-6. In this example, either MCAIN or HBEDOYA can implement RCAC in the Human Resources database.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.12759399414062, 386.04730224609375, 545.5682983398438, 408.11676025390625], "page": 39, "span": [0, 151], "__ref_s3_data": null}]}, {"text": "Figure 3-6 Result of the function ID query", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.321044921875, 314.7570495605469, 307.5635681152344, 324.75347900390625], "page": 39, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "3.6.2 Creating group profiles for the users and their roles", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.15365600585938, 282.8797912597656, 418.56524658203125, 296.369140625], "page": 39, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "Assuming that all the employees have a valid user profile, the next step is to create group profiles to group the employees. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.047607421875, 247.4474334716797, 532.9351806640625, 269.863525390625], "page": 39, "span": [0, 154], "__ref_s3_data": null}]}, {"text": "1. In this example, there are three group profiles:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 230.03945922851562, 357.9049987792969, 240.60972595214844], "page": 39, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "-HR (Human Resource personnel)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.1498565673828, 213.60145568847656, 313.8529357910156, 223.9517364501953], "page": 39, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "-MGR (Managers)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.3465118408203, 201.096923828125, 242.8311767578125, 212.0452117919922], "page": 39, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "-EMP (Employees)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.22793579101562, 189.64846801757812, 245.60702514648438, 199.87611389160156], "page": 39, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "These are created by creating user profiles with no password. Example 3-4 shows the Create User Profile ( CRTUSRPRF ) CL commands that you use to create these group profiles.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.51315307617188, 160.13287353515625, 547.29541015625, 182.80564880371094], "page": 39, "span": [0, 174], "__ref_s3_data": null}]}, {"text": "Example 3-4 Creating group profiles", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 138.42959594726562, 286.0830383300781, 148.75363159179688], "page": 39, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "CRTUSRPRF USRPRF(EMP) PASSWORD() TEXT('Employees Group') CRTUSRPRF USRPRF(MGR) PASSWORD() TEXT('Managers Group') CRTUSRPRF USRPRF(HR) PASSWORD() TEXT('Human Resources Group')", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.74122619628906, 93.17837524414062, 547.5665893554688, 138.2897186279297], "page": 39, "span": [0, 174], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.6204833984375, 27.91693878173828, 523.6016235351562, 37.26707458496094], "page": 39, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "23", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5922241210938, 27.93828010559082, 547.2591552734375, 37.411338806152344], "page": 39, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "24", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.0204849243164, 27.93828010559082, 78.4020004272461, 37.76950454711914], "page": 40, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.37641143798828, 27.63180923461914, 334.4214172363281, 37.37206268310547], "page": 40, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. You now must assign users to a group profile. Employees go in to the EMP group profile, Managers go into the MGR group profile, and Human Resource employees go into the HR group profile. For simplicity, this example selects one employee (DSSMITH), one manager (TQSPENSER), and one HR analyst (VGLUCCHESS).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.3994903564453, 674.6657104492188, 547.2675170898438, 721.529296875], "page": 40, "span": [0, 308], "__ref_s3_data": null}]}, {"text": "Note: Neither of the consultants (MCAIN and HBEDOYA) belong to any group profile.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [156.58865356445312, 646.8115234375, 533.43896484375, 657.5706176757812], "page": 40, "span": [0, 81], "__ref_s3_data": null}]}, {"text": "3.6.3 Demonstrating data access without RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.1897964477539, 609.5682983398438, 360.1609802246094, 622.9415283203125], "page": 40, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "Before implementing RCAC, run some simple SQL statements to demonstrate data access without RCAC. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1055908203125, 574.2984619140625, 540.3065185546875, 596.710693359375], "page": 40, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "1. The first SQL statement, which is shown in Example 3-5, basically counts the total number of rows in the EMPLOYEES table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 545.2590942382812, 547.2156982421875, 567.3441162109375], "page": 40, "span": [0, 124], "__ref_s3_data": null}]}, {"text": "Example 3-5 Counting the number of employees", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.759765625, 523.2703247070312, 334.7440490722656, 533.2840576171875], "page": 40, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "SELECT COUNT(*) as ROW_COUNT FROM HR_SCHEMA.EMPLOYEES;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.2035675048828, 505.5345458984375, 406.6163635253906, 516.2594604492188], "page": 40, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "The result of this query is shown in Figure 3-7, which is the total number of employees of the company.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.52401733398438, 469.4679260253906, 545.1071166992188, 492.073974609375], "page": 40, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "Figure 3-7 Number of employees", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.25228881835938, 407.0002746582031, 272.8937683105469, 416.7295227050781], "page": 40, "span": [0, 30], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/26"}, {"text": "2. Run a second SQL statement (shown in Example 3-6) that lists the employees. If you have read access to the table, you see all the rows no matter who you are.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.22938537597656, 368.23065185546875, 547.2517700195312, 390.6070861816406], "page": 40, "span": [0, 160], "__ref_s3_data": null}]}, {"text": "Example 3-6 Displaying the information of the Employees", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 347.4570007324219, 369.31500244140625, 355.7820129394531], "page": 40, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.8000030517578, 245.0392303466797, 286.67803955078125, 340.6308898925781], "page": 40, "span": [0, 124], "__ref_s3_data": null}]}, {"text": "The result of this query is shown in Figure 3-8.", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [150.65087890625, 710.772705078125, 356.252197265625, 721.2792358398438], "page": 41, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "Figure 3-8 List of employees without RCAC enabled", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.38050079345703, 311.7666931152344, 276.68267822265625, 321.1317138671875], "page": 41, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "3.6.4 Defining and creating row permissions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3528823852539, 278.95111083984375, 339.9589538574219, 292.13372802734375], "page": 41, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "Implement RCAC on the EMPLOYEES table by completing the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.41700744628906, 255.6973419189453, 484.33428955078125, 266.0401916503906], "page": 41, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "1. Start by defining a row permission. In this example, the rules to enforce include the following ones:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80001831054688, 226.56398010253906, 519.3287963867188, 249.3506622314453], "page": 41, "span": [0, 104], "__ref_s3_data": null}]}, {"text": "-Human Resources employees can see all the rows.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.22027587890625, 209.7350616455078, 392.5151062011719, 219.887451171875], "page": 41, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "-Managers can see only information for the employees that they manage.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.24195861816406, 197.71392822265625, 484.94476318359375, 207.83270263671875], "page": 41, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "-Employees can see only their own information.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.22813415527344, 185.97039794921875, 371.5732421875, 195.80975341796875], "page": 41, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "-Consultants are not allowed to see any rows in the table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.13531494140625, 173.5364532470703, 415.18304443359375, 183.9356689453125], "page": 41, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.7210693359375, 27.904325485229492, 523.6016235351562, 37.22553253173828], "page": 41, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "25", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5884399414062, 27.93828010559082, 547.2591552734375, 37.458621978759766], "page": 41, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "26", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.911659240722656, 27.93828010559082, 78.4020004272461, 37.578487396240234], "page": 42, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.68279266357422, 334.4214172363281, 37.28973388671875], "page": 42, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "To implement this row permission, run the SQL statement that is shown in Example 3-7.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.37896728515625, 710.8544311523438, 538.560302734375, 721.4290161132812], "page": 42, "span": [0, 85], "__ref_s3_data": null}]}, {"text": "Example 3-7 Creating a permission for the EMPLOYEE table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.6933135986328, 689.595703125, 383.918701171875, 699.4521484375], "page": 42, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "CREATE PERMISSION HR_SCHEMA.PERMISSION1_ON_EMPLOYEES ON HR_SCHEMA.EMPLOYEES AS EMPLOYEES FOR ROWS WHERE ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR' ) = 1 ) OR ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND ( EMPLOYEES . MANAGER_OF_EMPLOYEE = SESSION_USER OR EMPLOYEES . USER_ID = SESSION_USER ) ) OR ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'EMP' ) = 1 AND EMPLOYEES . USER_ID = SESSION_USER ) ENFORCED FOR ALL ACCESS ENABLE ;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [134.9765625, 547.642822265625, 547.2913818359375, 688.2454833984375], "page": 42, "span": [0, 438], "__ref_s3_data": null}]}, {"text": "2. Look at the definition of the table and see the permissions, as shown in Figure 3-9. QIBM_DEFAULT_EMPLOYEE_HR_SCHEMA is the default permission, as described in 3.1.2, \"Enabling and activating RCAC\" on page 16.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.09703063964844, 504.1363220214844, 539.8582153320312, 538.7603759765625], "page": 42, "span": [0, 212], "__ref_s3_data": null}]}, {"text": "Figure 3-9 Row permissions that are shown in System i Navigator", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.4061508178711, 291.1373596191406, 331.3226013183594, 300.8897705078125], "page": 42, "span": [0, 63], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/27"}, {"text": "3.6.5 Defining and creating column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.20075988769531, 258.3492431640625, 327.4058837890625, 271.5242919921875], "page": 42, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "Define the different masks for the columns that are sensitive by completing the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.30203247070312, 222.73190307617188, 526.414306640625, 245.54981994628906], "page": 42, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "1. Start with the DAY_OF_BIRTH column. In this example, the rules to enforce include the following ones:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 194.2399139404297, 538.78564453125, 216.82652282714844], "page": 42, "span": [0, 104], "__ref_s3_data": null}]}, {"text": "-Human Resources can see the entire date of birth of the employees.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.34336853027344, 177.2699432373047, 467.65625, 187.3153839111328], "page": 42, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "-Employees can see only their own date of birth.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.10040283203125, 160.46939086914062, 375.3867492675781, 170.65682983398438], "page": 42, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "-Managers can see the date of birth of their employees masked with YEAR being 9999.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.22607421875, 142.966064453125, 547.2565307617188, 153.288818359375], "page": 42, "span": [0, 83], "__ref_s3_data": null}]}, {"text": "To implement this column mask, run the SQL statement that is shown in Example 3-8.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.5434112548828, 126.48641204833984, 530.0606689453125, 136.5352020263672], "page": 42, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "Example 3-8 Creation of a mask on the DATE_OF_BIRTH column", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.55294799804688, 105.13160705566406, 404.0565185546875, 114.82978820800781], "page": 42, "span": [0, 58], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/11"}, {"text": "RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.0765838623047, 529.9927368164062, 523.3837280273438, 720.341552734375], "page": 43, "span": [0, 449], "__ref_s3_data": null}]}, {"text": "2. The other column to mask in this example is the TAX_ID information. In this example, the rules to enforce include the following ones:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.99525451660156, 495.1485595703125, 547.2122192382812, 517.6535034179688], "page": 43, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "-Human Resources can see the unmasked TAX_ID of the employees.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.24124145507812, 478.3014831542969, 469.1528015136719, 488.7835998535156], "page": 43, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "-Employees can see only their own unmasked TAX_ID.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.32850646972656, 461.2357177734375, 403.98541259765625, 471.3717956542969], "page": 43, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "-Managers see a masked version of TAX_ID with the first five characters replaced with the X character (for example, XXX-XX-1234).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.30641174316406, 432.28228759765625, 545.16845703125, 454.51611328125], "page": 43, "span": [0, 129], "__ref_s3_data": null}]}, {"text": "-Any other person sees the entire TAX_ID as masked, for example, XXX-XX-XXXX.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.07168579101562, 414.8314208984375, 529.463623046875, 425.31011962890625], "page": 43, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "To implement this column mask, run the SQL statement that is shown in Example 3-9.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.67160034179688, 397.85516357421875, 530.060302734375, 407.97796630859375], "page": 43, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "Example 3-9 Creating a mask on the TAX_ID column", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.63172912597656, 377.08172607421875, 352.5632629394531, 386.8071594238281], "page": 43, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "CREATE MASK HR_SCHEMA.MASK_TAX_ID_ON_EMPLOYEES ON HR_SCHEMA.EMPLOYEES AS EMPLOYEES FOR COLUMN TAX_ID RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR' ) = 1 THEN EMPLOYEES . TAX_ID WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . TAX_ID WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 'XXX-XX-' CONCAT QSYS2 . SUBSTR ( EMPLOYEES . TAX_ID , 8 , 4 ) ) WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'EMP' ) = 1 THEN EMPLOYEES . TAX_ID ELSE 'XXX-XX-XXXX' END ENABLE ;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.53025817871094, 104.04078674316406, 545.3026123046875, 374.6419372558594], "page": 43, "span": [0, 590], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.6368103027344, 27.94813346862793, 523.6016235351562, 37.276817321777344], "page": 43, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "27", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.412353515625, 27.93828010559082, 547.2591552734375, 37.551448822021484], "page": 43, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "3. Figure 3-10 shows the masks that are created in the HR_SCHEMA.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.16213989257812, 710.7819213867188, 449.952392578125, 721.4285278320312], "page": 44, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "Figure 3-10 Column masks shown in System i Navigator", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.32794189453125, 609.9722290039062, 294.5016784667969, 619.325927734375], "page": 44, "span": [0, 52], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/28"}, {"text": "3.6.6 Activating RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.31277465820312, 576.8663940429688, 203.98521423339844, 590.3197021484375], "page": 44, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "Now that you have created the row permission and the two column masks, RCAC must be activated. The row permission and the two column masks are enabled (last clause in the scripts), but now you must activate RCAC on the table. To do so, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.171875, 529.45751953125, 547.2256469726562, 563.8794555664062], "page": 44, "span": [0, 265], "__ref_s3_data": null}]}, {"text": "1. Run the SQL statements that are shown in Example 3-10.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.7984619140625, 513.2022094726562, 409.4788818359375, 523.4619750976562], "page": 44, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "Example 3-10 Activating RCAC on the EMPLOYEES table", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.4545135498047, 491.461669921875, 375.2909851074219, 501.6716613769531], "page": 44, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "/* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.58570861816406, 426.5678405761719, 376.8215637207031, 485.00579833984375], "page": 44, "span": [0, 180], "__ref_s3_data": null}]}, {"text": "2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas \uf0ae HR_SCHEMA \uf0ae Tables , right-click the EMPLOYEES table, and click Definition .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.21578979492188, 378.27978515625, 540.8014526367188, 412.595458984375], "page": 44, "span": [0, 231], "__ref_s3_data": null}]}, {"text": "Figure 3-11 Selecting the EMPLOYEES table from System i Navigator", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.287353515625, 134.46551513671875, 348.3514404296875, 144.03317260742188], "page": 44, "span": [0, 65], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/29"}, {"text": "28", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.94425964355469, 27.93828010559082, 78.4020004272461, 37.58649444580078], "page": 44, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.39510345458984, 27.71799087524414, 334.4214172363281, 37.344871520996094], "page": 44, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "3. The EMPLOYEES table definition is displayed, as shown in Figure 3-12. Note that the Row access control and Column access control options are checked.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.1829071044922, 699.0615844726562, 531.1966552734375, 721.4234619140625], "page": 45, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "Figure 3-12 RCAC enabled on the EMPLOYEES table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.24822998046875, 441.8091125488281, 356.59588623046875, 451.58001708984375], "page": 45, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/30"}, {"text": "3.6.7 Demonstrating data access with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.26148223876953, 409.7285461425781, 340.0064392089844, 422.9130554199219], "page": 45, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "You are now ready to start testing RCAC with the four different users. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89842224121094, 374.4912109375, 547.259521484375, 396.6586608886719], "page": 45, "span": [0, 100], "__ref_s3_data": null}]}, {"text": "1. The first SQL statement that is shown in Example 3-11 illustrates the EMPLOYEE count. You know that there are 42 rows from the query that was run before RCAC was put in place (see 3.6.3, \"Demonstrating data access without RCAC\" on page 24).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 332.77825927734375, 544.1072387695312, 367.4451599121094], "page": 45, "span": [0, 243], "__ref_s3_data": null}]}, {"text": "Example 3-11 EMPLOYEES count", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 312.35699462890625, 279.0827941894531, 320.6820068359375], "page": 45, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "SELECT COUNT(*) as ROW_COUNT FROM HR_SCHEMA.EMPLOYEES;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 294.8070983886719, 406.6163635253906, 303.5818786621094], "page": 45, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. The result of the query for a user that belongs to the HR group profile is shown in Figure 3-13. This user can see all the 42 rows (employees).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2246551513672, 258.3946533203125, 511.5380859375, 280.7793273925781], "page": 45, "span": [0, 146], "__ref_s3_data": null}]}, {"text": "Figure 3-13 Count of EMPLOYEES by HR", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.48751831054688, 196.33412170410156, 309.8641662597656, 206.0880889892578], "page": 45, "span": [0, 36], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/31"}, {"text": "3. The result of the same query for a user who is logged on as TQSPENSER (Manager) is shown in Figure 3-14. TQSPENSER has five employees in his department and he can also see his own row, which is why the count is 6.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.1068572998047, 145.77272033691406, 540.7218627929688, 180.23095703125], "page": 45, "span": [0, 216], "__ref_s3_data": null}]}, {"text": "Figure 3-14 Count of EMPLOYEES by a manager", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0082244873047, 84.47093200683594, 340.1214904785156, 94.05392456054688], "page": 45, "span": [0, 43], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/32"}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.5658264160156, 27.9404296875, 523.6016235351562, 37.236785888671875], "page": 45, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "29", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5850830078125, 27.93828010559082, 547.2591552734375, 37.54125213623047], "page": 45, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "30", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.2293701171875, 27.93828010559082, 78.4020004272461, 37.64487838745117], "page": 46, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.41975402832031, 27.68392562866211, 334.4214172363281, 37.348270416259766], "page": 46, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "4. The result of the same query that is run by an employee (DSSMITH) gives the result that is shown in Figure 3-15. Each employee can see only his or her own data (row).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.95506286621094, 698.8024291992188, 547.213623046875, 721.3870849609375], "page": 46, "span": [0, 169], "__ref_s3_data": null}]}, {"text": "Figure 3-15 Count of EMPLOYEES by an employee", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.09197998046875, 637.8728637695312, 347.52752685546875, 647.5279541015625], "page": 46, "span": [0, 45], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/33"}, {"text": "5. The result of the same query that is run by the Consultant/DBE gives the result that is shown in Figure 3-16. The consultants/DBE can manage and implement RCAC, but they do not see any rows at all.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.4210205078125, 586.8585205078125, 543.9885864257812, 621.4335327148438], "page": 46, "span": [0, 200], "__ref_s3_data": null}]}, {"text": "Figure 3-16 Count of EMPLOYEES by a consultant", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.2426300048828, 525.92822265625, 345.4479675292969, 535.4974365234375], "page": 46, "span": [0, 46], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/34"}, {"text": "Does the result make sense? Yes, it does because RCAC is enabled.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.1999969482422, 499.5382995605469, 456.2101135253906, 509.35888671875], "page": 46, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "6. Run queries against the EMPLOYEES table. The query that is used in this example runs and tests with the four different user profiles and is the same query that was run in 3.6.3, \"Demonstrating data access without RCAC\" on page 24. It is shown in Example 3-12.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.42527770996094, 458.2580261230469, 544.2874145507812, 492.674072265625], "page": 46, "span": [0, 262], "__ref_s3_data": null}]}, {"text": "Example 3-12 SELECT statement to test with the different users", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 437.3970031738281, 396.18621826171875, 445.7220153808594], "page": 46, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.8000030517578, 334.8448181152344, 266.6982727050781, 429.0736999511719], "page": 46, "span": [0, 124], "__ref_s3_data": null}]}, {"text": "7. Figure 3-17 shows the results of the query for a Human Resources (VGLUCCHESS) user profile. The user can see all the rows and all the columns.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.98915100097656, 699.0184936523438, 547.152587890625, 721.4612426757812], "page": 47, "span": [0, 145], "__ref_s3_data": null}]}, {"text": "Figure 3-17 SQL statement result by Human Resources user profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.40335845947266, 295.6847839355469, 338.4682312011719, 305.3370666503906], "page": 47, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "8. Figure 3-18 shows the results of the same query for the Manager (TQSPENSER). Notice the masking of the DATE_OF_BIRTH and TAX_ID columns.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.15513610839844, 256.77691650390625, 546.0484008789062, 279.5661315917969], "page": 47, "span": [0, 139], "__ref_s3_data": null}]}, {"text": "Figure 3-18 SQL statement result by Manager profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.69406127929688, 164.74615478515625, 279.8969421386719, 174.571044921875], "page": 47, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "9. Figure 3-19 shows the results of the same query for an employee (DSSMITH). The employee can only see only his own data with no masking at all.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2742919921875, 126.19644927978516, 518.9005737304688, 148.72137451171875], "page": 47, "span": [0, 145], "__ref_s3_data": null}]}, {"text": "Figure 3-19 SQL statement result by an employee profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.48681640625, 76.97213745117188, 295.5399169921875, 86.59312438964844], "page": 47, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.598876953125, 27.90372085571289, 523.6016235351562, 37.23920440673828], "page": 47, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "31", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6412963867188, 27.93828010559082, 547.2591552734375, 37.40355682373047], "page": 47, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "32", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.32125091552734, 27.93828010559082, 78.4020004272461, 37.604923248291016], "page": 48, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.700542449951172, 334.4214172363281, 37.34117126464844], "page": 48, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "10.Figure 3-20 shows the results of the same query for the Consultant/DBE, who is not one of the company's employees.", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.8000030517578, 699.1849975585938, 547.2752685546875, 721.3404541015625], "page": 48, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "Figure 3-20 SQL statement result by Consultant/DBE profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.52674102783203, 617.0689697265625, 307.95556640625, 626.601806640625], "page": 48, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "3.6.8 Demonstrating data access with a view and RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.2658462524414, 585.1321411132812, 409.0855407714844, 598.2813110351562], "page": 48, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "This section covers data access with a view and RCAC. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6865692138672, 561.75830078125, 515.0767822265625, 572.1253662109375], "page": 48, "span": [0, 83], "__ref_s3_data": null}]}, {"text": "1. The EMPLOYEES table has a column that is called On_Leave_Flag (Figure 3-21 on page 33) indicating that the employee is on Leave of Absence. For this purpose, a view is created that lists only the employees that are on leave.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.65882873535156, 520.366943359375, 547.2307739257812, 554.7566528320312], "page": 48, "span": [0, 227], "__ref_s3_data": null}]}, {"text": "Figure 3-21 Employees on leave", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.60844421386719, 215.6061248779297, 198.87405395507812, 225.38070678710938], "page": 49, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "2. Example 3-13 shows the definition of the view.", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.15798950195312, 188.87811279296875, 355.6940002441406, 199.41677856445312], "page": 49, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "Example 3-13 VIew of employees on leave", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 167.15521240234375, 311.72705078125, 177.46084594726562], "page": 49, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "CREATE VIEW HR_SCHEMA.EMPLOYEES_ON_LEAVE (EMPLOYEE_ID, FIRST_NAME, MIDDLE_INITIAL, LAST_NAME, WORK_DEPARTMENT, PHONE_EXTENSION, JOB_DESCRIPTION, DATE_OF_BIRTH,", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 66.62857055664062, 426.59613037109375, 159.4019775390625], "page": 49, "span": [0, 159], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.6611328125, 27.86074447631836, 523.6016235351562, 37.31344985961914], "page": 49, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "33", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7655029296875, 27.93828010559082, 547.2591552734375, 37.37984085083008], "page": 49, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE, ON_LEAVE_FLAG )", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [258.8564758300781, 674.603759765625, 446.6356201171875, 720.6338500976562], "page": 50, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "AS SELECT EMPLOYEE_ID, FIRST_NAME , MIDDLE_INITIAL, LAST_NAME , WORK_DEPARTMENT, PHONE_EXTENSION, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE, ON_LEAVE_FLAG FROM HR_SCHEMA.EMPLOYEES WHERE ON_LEAVE_FLAG = 'Y';", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.84320068359375, 495.2419738769531, 271.8138427734375, 674.0155639648438], "page": 50, "span": [0, 233], "__ref_s3_data": null}]}, {"text": "3. Use the view to query the data and see who is on leave. The SQL statement that is used is shown in Example 3-14:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.26315307617188, 454.30218505859375, 547.3662109375, 476.8594665527344], "page": 50, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "Example 3-14 SQL statement for employees on leave", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 433.1369934082031, 353.97808837890625, 441.4620056152344], "page": 50, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES_ON_LEAVE;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.48304748535156, 330.2506103515625, 316.67755126953125, 426.0392761230469], "page": 50, "span": [0, 134], "__ref_s3_data": null}]}, {"text": "4. Start with the Human Resources person (VGLUCCHESS) and see what is the result of the previous query. He sees the two employees that are on leave and no masking is done over the DATE_OF_BIRTH and TAX_ID columns. The results of the query are shown in Figure 3-22.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.9942169189453, 270.6485595703125, 547.2506713867188, 317.7777404785156], "page": 50, "span": [0, 264], "__ref_s3_data": null}]}, {"text": "Figure 3-22 Employees on leave - Human Resources user", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.3441390991211, 212.0773162841797, 302.135009765625, 221.99578857421875], "page": 50, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "5. Figure 3-23 shows what the Manager (TQSPENSER) gets when he runs the same query over the view. He sees only the employees that are on leave that are managed by him. In this example, it is one employee. The columns are masked, which confirms that RCAC is applied to the view as well.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.1876220703125, 149.55426025390625, 546.509521484375, 196.12924194335938], "page": 50, "span": [0, 285], "__ref_s3_data": null}]}, {"text": "Figure 3-23 Employee on leave - Manager of Field Reps user", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.43692779541016, 98.1999740600586, 314.1068115234375, 108.11901092529297], "page": 50, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "34", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.26634979248047, 27.93828010559082, 78.4020004272461, 37.68272018432617], "page": 50, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.34017944335938, 27.696313858032227, 334.4214172363281, 37.3791618347168], "page": 50, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "6. Figure 3-24 shows what the employee (DSSMITH) gets when he runs the same query over the view. The employee gets an empty set or he gets only himself if he is on leave.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.16725158691406, 698.8943481445312, 536.1886596679688, 721.2736206054688], "page": 51, "span": [0, 170], "__ref_s3_data": null}]}, {"text": ".", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.79974365234375, 684.2783813476562, 67.5686264038086, 693.4913940429688], "page": 51, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Figure 3-24 Employees on leave - employee user", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.52584838867188, 626.8951416015625, 265.8390808105469, 636.5194702148438], "page": 51, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [344.75775146484375, 27.914899826049805, 523.6016235351562, 37.27613830566406], "page": 51, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "35", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7324829101562, 27.93828010559082, 547.2591552734375, 37.47168731689453], "page": 51, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "36", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.37136840820312, 27.93828010559082, 78.4020004272461, 37.52334976196289], "page": 52, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.71300506591797, 334.534423828125, 37.346683502197266], "page": 52, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/35"}, {"text": "Chapter 4.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 53, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "4", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 699.2093505859375], "page": 53, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Implementing Row and Column Access Control: Banking example", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.44911193847656, 451.1016845703125, 532.0337524414062, 538.6605224609375], "page": 53, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "This chapter illustrates the Row and Column Access Control (RCAC) concepts using a banking example. Appendix A, \"Database definitions for the RCAC banking example\" on page 121 provides a script that you can use to create all the database definitions or DDLs to re-create this RCAC example.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91500854492188, 367.058837890625, 546.1500854492188, 413.4091491699219], "page": 53, "span": [0, 289], "__ref_s3_data": null}]}, {"text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.71763610839844, 344.9483642578125, 347.4121398925781, 355.50213623046875], "page": 53, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Business requirements for the RCAC banking scenario", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.78599548339844, 327.7929992675781, 393.0647888183594, 338.13604736328125], "page": 53, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "GLYPH Description of the users roles and responsibilities", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.63722229003906, 315.72662353515625, 368.54632568359375, 326.46844482421875], "page": 53, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "GLYPH Implementation of RCAC", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69590759277344, 303.98077392578125, 261.51287841796875, 314.0971984863281], "page": 53, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.962242126464844, 27.78060531616211, 257.24334716796875, 37.377197265625], "page": 53, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "37", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5948486328125, 27.93828010559082, 547.2591552734375, 37.74201202392578], "page": 53, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4.1 Business requirements for the RCAC banking scenario", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.24122619628906, 702.5103149414062, 512.5513916015625, 718.264892578125], "page": 54, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "As part of a new internet banking project, the Bank decides to raise the level of data access control on the following three tables that are involved in the new customer-facing application:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84820556640625, 663.7042236328125, 543.9003295898438, 685.9572143554688], "page": 54, "span": [0, 189], "__ref_s3_data": null}]}, {"text": "GLYPH CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.72608947753906, 647.138916015625, 214.6067352294922, 657.6016845703125], "page": 54, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "GLYPH ACCOUNTS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6984405517578, 635.1390991210938, 206.64071655273438, 645.5955810546875], "page": 54, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "GLYPH TRANSACTIONS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5414581298828, 623.1392822265625, 229.18223571777344, 633.65625], "page": 54, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "RCAC will be used to restrict access to the rows in these three tables by using permissions, and to restrict column values by using masks. The default position is that no user can access the rows in the tables. From there, specific bank employees are allowed access only to the rows for their job responsibilities. In addition, columns containing personal or sensitive data are masked appropriately. Bank customers are allowed access to only their rows and column values.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.95913696289062, 541.180419921875, 547.2745971679688, 611.2424926757812], "page": 54, "span": [0, 471], "__ref_s3_data": null}]}, {"text": "In this example, it is assumed that the Bank employees have access to the tables when working on the premises only. Employee access to data is provided by programs and tools using standard DB2 interfaces, such as embedded SQL, ODBC, JDBC, and CLI. The database connection authentication for these interfaces uses the employee's personal and unique IBM i user profile. Operating in their professional role, employees do not have access to bank data through the Internet.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9995574951172, 459.0948486328125, 546.8505859375, 529.453857421875], "page": 54, "span": [0, 469], "__ref_s3_data": null}]}, {"text": "Bank customers have access to their accounts and transactions by using a new web application. Each customer has unique credentials for logging in to the application. The authentication of the customer is handled by the web server. After the customer is authenticated, the web server establishes a connection to DB2 for data access. This connection uses a common IBM i user profile that is known as WEBUSER. This user profile is secured and is used only by the web application. No Bank employee has access to the WEBUSER profile, and no customer has an IBM i user profile.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.01345825195312, 365.1432800292969, 547.270751953125, 447.330810546875], "page": 54, "span": [0, 571], "__ref_s3_data": null}]}, {"text": "The customer's identity is passed to DB2 by using a global variable. The global variable is secured and can be accessed only by the WEBUSER. The web application sets the CUSTOMER_LOGIN_ID variable to the customer's login value. This value is compared to the customer's login value that is found in the CUSTOMER_LOGIN_ID column of the CUSTOMERS table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03805541992188, 295.1842041015625, 547.2139892578125, 353.35540771484375], "page": 54, "span": [0, 350], "__ref_s3_data": null}]}, {"text": "Applications that do not use the web interface do not have to be changed because the global variable is NULL by default.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91661071777344, 261.1648254394531, 547.2429809570312, 283.2774963378906], "page": 54, "span": [0, 120], "__ref_s3_data": null}]}, {"text": "38", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.26496124267578, 27.93828010559082, 78.4020004272461, 37.504844665527344], "page": 54, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.41310119628906, 27.687854766845703, 334.4214172363281, 37.32243728637695], "page": 54, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "A diagram of the internet banking architecture is shown in Figure 4-1:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6597900390625, 710.8218994140625, 442.2786865234375, 721.31201171875], "page": 55, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "GLYPH The row permission and column masks for the CUSTOMERS table are based on the group of which the user profile is part. If the user is a customer, their specific login ID also is tested.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56939697265625, 670.2985229492188, 547.3546142578125, 704.505615234375], "page": 55, "span": [0, 200], "__ref_s3_data": null}]}, {"text": "GLYPH The row permission and column mask for the ACCOUNTS table are based on the CUSTOMERS table permission rules. A subquery is used to connect the accounts (child) with the customer (parent).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.55992126464844, 628.9960327148438, 546.7332153320312, 663.4683227539062], "page": 55, "span": [0, 203], "__ref_s3_data": null}]}, {"text": "GLYPH The row permission for the TRANSACTIONS table is based on the ACCOUNTS table permission rules and the CUSTOMERS table permission rules. A subquery is used to connect the transactions (child) with the account (parent) and the account (child) with the customer (parent).", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.64488220214844, 576.1607055664062, 546.2234497070312, 622.9520874023438], "page": 55, "span": [0, 284], "__ref_s3_data": null}]}, {"text": "Figure 4-1 Internet banking example", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3312530517578, 293.15283203125, 286.0867919921875, 302.4555969238281], "page": 55, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/36"}, {"text": "4.2 Description of the users roles and responsibilities", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.38951110839844, 250.45684814453125, 475.6933898925781, 266.34906005859375], "page": 55, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "During the requirements gathering phase, the following groups of users are identified and codified:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03475952148438, 211.77857971191406, 533.134521484375, 234.10536193847656], "page": 55, "span": [0, 99], "__ref_s3_data": null}]}, {"text": "GLYPH SECURITY: Security officer and security administrators", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5544891357422, 194.29025268554688, 395.1046142578125, 204.80311584472656], "page": 55, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "GLYPH DBE: Database engineers", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56268310546875, 182.1162872314453, 266.7660217285156, 192.73492431640625], "page": 55, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "GLYPH ADMIN: Bank business administrators", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.47203063964844, 170.73941040039062, 319.29107666015625, 180.6303253173828], "page": 55, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "GLYPH TELLER: Bank tellers", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.59524536132812, 158.73960876464844, 246.766357421875, 168.96905517578125], "page": 55, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "GLYPH CUSTOMER: Bank customers using the internet", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.43141174316406, 146.72010803222656, 365.2953796386719, 156.56028747558594], "page": 55, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "GLYPH PUBLIC: Anyone not already in a group", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4724578857422, 134.33944702148438, 325.77801513671875, 144.67828369140625], "page": 55, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.38104248046875, 27.994510650634766, 523.5935668945312, 37.23670959472656], "page": 55, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "39", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6470336914062, 27.93828010559082, 547.2591552734375, 37.60462188720703], "page": 55, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "40", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.39681243896484, 27.93828010559082, 78.4020004272461, 37.67327880859375], "page": 56, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.661266326904297, 334.4214172363281, 37.33354187011719], "page": 56, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Based on their respective roles and responsibilities, the users (that is, a group) are controlled by row permissions and column masks. The chart that is shown in Figure 4-2 shows the rules for row and column access in this example.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.121826171875, 687.2786865234375, 547.2127075195312, 721.305419921875], "page": 56, "span": [0, 231], "__ref_s3_data": null}]}, {"text": "Figure 4-2 Rules for row and column access", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3048095703125, 373.78326416015625, 317.6307067871094, 383.28173828125], "page": 56, "span": [0, 42], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/12"}, {"text": "The chart that is shown in Figure 4-3 shows the column access that is allowed by group and lists the column masks by table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89483642578125, 699.15673828125, 545.2960205078125, 721.2871704101562], "page": 57, "span": [0, 123], "__ref_s3_data": null}]}, {"text": "Figure 4-3 Column masks", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.1103973388672, 381.31884765625, 245.0207977294922, 390.2972106933594], "page": 57, "span": [0, 23], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/13"}, {"text": "For the demonstration and testing of RCAC in this example, the following users interact with the database. Furthermore, the column masking rules are developed independently of the row permissions. If a person does not have permission to access the row, the column mask processing does not occur.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.20828247070312, 318.3379211425781, 543.4578247070312, 364.5528869628906], "page": 57, "span": [0, 295], "__ref_s3_data": null}]}, {"text": "GLYPH Hernando Bedoya is a DB2 for i database engineer with the user profile of HBEDOYA. He is part of the DBE group.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60292053222656, 289.3758239746094, 547.2935791015625, 312.2777404785156], "page": 57, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "GLYPH Mike Cain is a DB2 for i database engineer with the user profile of MCAIN. He is part of the DBE group.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6302490234375, 260.23626708984375, 538.9269409179688, 282.8565979003906], "page": 57, "span": [0, 119], "__ref_s3_data": null}]}, {"text": "GLYPH Veronica G. Lucchess is a bank account administrator with the user profile of VGLUCCHESS. She is part of the ADMIN group.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5699005126953, 231.489501953125, 492.53729248046875, 254.11923217773438], "page": 57, "span": [0, 137], "__ref_s3_data": null}]}, {"text": "GLYPH Tom Q. Spenser is a bank teller with the user profile of TQSPENSER. He is part of the TELLER group.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56982421875, 201.81741333007812, 534.6511840820312, 225.0314178466797], "page": 57, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "GLYPH The IT security officer has the user profile of SECURITY. She is not part of any group.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4237060546875, 185.3791046142578, 529.1213989257812, 196.2005157470703], "page": 57, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "GLYPH The online banking web application uses the user profile WEBUSER. This profile is part of the CUSTOMER group. Any future customer-facing applications can also use this group if needed.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4558868408203, 144.6416473388672, 547.3323364257812, 178.86013793945312], "page": 57, "span": [0, 200], "__ref_s3_data": null}]}, {"text": "GLYPH Adam O. Olsen is a bank customer with a web application login ID of KLD72CQR8JG.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.58990478515625, 127.630859375, 530.7957763671875, 138.04251098632812], "page": 57, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.45973205566406, 27.949493408203125, 523.5935668945312, 37.27492904663086], "page": 57, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "41", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5521850585938, 27.93828010559082, 547.2591552734375, 37.53369903564453], "page": 57, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4.3 Implementation of RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.40861511230469, 705.659912109375, 283.6307678222656, 721.7451171875], "page": 58, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "Figure 4-4 shows the data model of the banking scenario that is used in this example.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.34254455566406, 679.0490112304688, 514.2452392578125, 689.4026489257812], "page": 58, "span": [0, 85], "__ref_s3_data": null}]}, {"text": "Figure 4-4 Data model of the banking scenario", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0010528564453, 478.4273681640625, 326.9934387207031, 488.0740966796875], "page": 58, "span": [0, 45], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/37"}, {"text": "This section covers the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.00115966796875, 451.8724060058594, 309.19659423828125, 462.0606994628906], "page": 58, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "GLYPH Reviewing the tables that are used in this example", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69436645507812, 435.15850830078125, 372.9923095703125, 445.3446044921875], "page": 58, "span": [0, 66], "__ref_s3_data": null}]}, {"text": "GLYPH Assigning function ID QIBM_DB_SECADM to the Database Engineers group", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4688720703125, 422.6627502441406, 490.6497802734375, 433.2422180175781], "page": 58, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "GLYPH Creating group profiles for the users and their roles", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56883239746094, 410.80712890625, 376.5571594238281, 421.4914855957031], "page": 58, "span": [0, 69], "__ref_s3_data": null}]}, {"text": "GLYPH Creating the CUSTOMER_LOGIN_ID global variable", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65866088867188, 398.6716003417969, 384.3678283691406, 409.8951721191406], "page": 58, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "GLYPH Defining and creating row permissions", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.79405212402344, 386.6734313964844, 320.787353515625, 397.24853515625], "page": 58, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "GLYPH Defining and creating column masks", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.57369995117188, 374.905517578125, 312.29620361328125, 385.2193908691406], "page": 58, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "GLYPH Restricting the inserting and updating of masked data", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5524444580078, 362.90753173828125, 387.68585205078125, 373.3866882324219], "page": 58, "span": [0, 69], "__ref_s3_data": null}]}, {"text": "GLYPH Activating row and column access control", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.58221435546875, 351.0097351074219, 334.40216064453125, 361.2847900390625], "page": 58, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "GLYPH Reviewing row permissions", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.76596069335938, 338.75787353515625, 271.91436767578125, 349.0521240234375], "page": 58, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "GLYPH Demonstrating data access with RCAC", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.70089721679688, 327.16015625, 323.4725036621094, 337.5362854003906], "page": 58, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "GLYPH Query implementation with RCAC activated", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4153289794922, 315.0921630859375, 343.3009948730469, 325.6691589355469], "page": 58, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "4.3.1 Reviewing the tables that are used in this example", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.28388214111328, 282.4974060058594, 410.4787292480469, 295.5218505859375], "page": 58, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "This section reviews the tables that are used in this example. As shown in Figure 4-5, there are three main tables that are involved in the data model: CUSTOMERS, ACCOUNTS, and TRANSACTIONS. There are 90 customers.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.78396606445312, 235.11878967285156, 541.1093139648438, 269.3280334472656], "page": 58, "span": [0, 214], "__ref_s3_data": null}]}, {"text": "Figure 4-5 Tables that are used in the banking example", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.12417602539062, 151.3236541748047, 360.94549560546875, 160.32456970214844], "page": 58, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Note: Appendix A, \"Database definitions for the RCAC banking example\" on page 121 provides a script that you can use to create all the database definitions or DDLs to re-create this RCAC example.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.4352264404297, 89.66874694824219, 525.7510986328125, 124.07282257080078], "page": 58, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "42", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.37181854248047, 27.93828010559082, 78.4020004272461, 37.574485778808594], "page": 58, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.711872100830078, 334.43927001953125, 37.300533294677734], "page": 58, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "To review the attributes of each table that is used in this banking example, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89007568359375, 698.94091796875, 525.0703125, 721.3560791015625], "page": 59, "span": [0, 106], "__ref_s3_data": null}]}, {"text": "1. Review the columns of each the tables through System i Navigator. Expand Database \uf0ae named Database \uf0ae Schemas \uf0ae BANK_SCHEMA \uf0ae Tables .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.79959106445312, 670.1790161132812, 543.2303466796875, 694.369873046875], "page": 59, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "2. Right-click the CUSTOMERS table and select Definition . Figure 4-6 shows the attributes for the CUSTOMERS table. The Row access control and Column access control options are not selected, which indicates that the table does not have RCAC implemented.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.16558837890625, 629.1010131835938, 546.8358764648438, 663.4356079101562], "page": 59, "span": [0, 253], "__ref_s3_data": null}]}, {"text": "Figure 4-6 CUSTOMERS table attributes", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.14613342285156, 418.0169982910156, 303.8804626464844, 427.2879943847656], "page": 59, "span": [0, 37], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/38"}, {"text": "3. Click the Columns tab to see the columns of the CUSTOMERS table, as shown in Figure 4-7.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.9766082763672, 378.66107177734375, 517.3616333007812, 401.477294921875], "page": 59, "span": [0, 91], "__ref_s3_data": null}]}, {"text": "Figure 4-7 Column definitions of the CUSTOMERS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.3719711303711, 169.74649047851562, 294.08258056640625, 179.3585662841797], "page": 59, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.48805236816406, 27.967016220092773, 523.5935668945312, 37.22077178955078], "page": 59, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "43", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6427001953125, 27.93828010559082, 547.2591552734375, 37.57048034667969], "page": 59, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "44", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.2916488647461, 27.93828010559082, 78.4020004272461, 37.80024719238281], "page": 60, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.41600799560547, 27.63385009765625, 334.4485168457031, 37.373573303222656], "page": 60, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "4. Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the CUSTOMERS table, as shown in Figure 4-8. There are no Foreign Key Constraints or Check Constraints on the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.0037384033203, 675.2788696289062, 538.2010498046875, 721.4434204101562], "page": 60, "span": [0, 263], "__ref_s3_data": null}]}, {"text": "Figure 4-8 Reviewing the constraints on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.43617248535156, 473.768798828125, 396.242431640625, 483.36297607421875], "page": 60, "span": [0, 59], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/39"}, {"text": "5. Review the definition of the ACCOUNTS table. The definition of the ACCOUNTS table is shown in Figure 4-9. RCAC has not been defined for this table yet.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.3007049560547, 434.6003723144531, 542.1918334960938, 457.54901123046875], "page": 60, "span": [0, 154], "__ref_s3_data": null}]}, {"text": "Figure 4-9 ACCOUNTS table attributes", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3052520751953, 219.217041015625, 297.04034423828125, 228.72657775878906], "page": 60, "span": [0, 36], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/40"}, {"text": "6. Click the Columns tab to see the columns of the ACCOUNTS table, as shown in Figure 4-10.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.99215698242188, 698.8906860351562, 509.6353759765625, 721.5469970703125], "page": 61, "span": [0, 91], "__ref_s3_data": null}]}, {"text": "Figure 4-10 Column definitions of the ACCOUNTS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.3238754272461, 545.0313110351562, 291.8764343261719, 554.8672485351562], "page": 61, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "7. Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the ACCOUNTS table, as shown in Figure 4-11. There is one Foreign Key Constraint and no Check Constraints on the ACCOUNTS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.97557067871094, 482.25897216796875, 538.2010498046875, 528.322021484375], "page": 61, "span": [0, 265], "__ref_s3_data": null}]}, {"text": "Figure 4-11 Reviewing the constraints on the ACCOUNTS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.44501495361328, 294.8391418457031, 322.40972900390625, 304.5721740722656], "page": 61, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.4091796875, 27.91837501525879, 523.5935668945312, 37.27334213256836], "page": 61, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "45", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.630126953125, 27.93828010559082, 547.2591552734375, 37.63974380493164], "page": 61, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "8. Review the definition of the TRANSACTIONS table. The definition of the TRANSACTIONS table is shown in Figure 4-12. RCAC is not defined for this table yet.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2132110595703, 698.5640869140625, 547.2595825195312, 721.408447265625], "page": 62, "span": [0, 157], "__ref_s3_data": null}]}, {"text": "Figure 4-12 TRANSACTIONS table attributes", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.18008422851562, 483.1399230957031, 322.06451416015625, 492.7385559082031], "page": 62, "span": [0, 41], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/41"}, {"text": "9. Click the Columns tab to see the columns of the TRANSACTIONS table, as shown in Figure 4-13.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.16123962402344, 443.9065856933594, 531.8204345703125, 466.9815979003906], "page": 62, "span": [0, 95], "__ref_s3_data": null}]}, {"text": "Figure 4-13 Column definitions of the TRANSACTIONS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [135.9007568359375, 299.6871032714844, 383.970458984375, 309.5373229980469], "page": 62, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "10.Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the TRANSACTIONS table, as shown in Figure 4-14. There is one Foreign Key Constraint and one Check Constraint on the TRANSACTIONS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 237.45896911621094, 547.3941040039062, 283.4495849609375], "page": 62, "span": [0, 273], "__ref_s3_data": null}]}, {"text": "Figure 4-14 Reviewing the constraints on the TRANSACTIONS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.38268280029297, 57.628116607666016, 342.353271484375, 67.44571685791016], "page": 62, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "46", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.2685317993164, 27.93828010559082, 78.4020004272461, 37.63324737548828], "page": 62, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.30252075195312, 27.62788200378418, 334.4423828125, 37.370174407958984], "page": 62, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Now that you have reviewed the database model for this example, the following sections describe the steps that are required to implement RCAC in this banking scenario.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1913299560547, 699.0999755859375, 527.005615234375, 721.2046508789062], "page": 63, "span": [0, 167], "__ref_s3_data": null}]}, {"text": "4.3.2 Assigning function ID QIBM_DB_SECADM to the Database Engineers group", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [63.96824645996094, 650.2410278320312, 532.1195068359375, 679.559814453125], "page": 63, "span": [0, 74], "__ref_s3_data": null}]}, {"text": "The first step is to assign the appropriate function usage ID to the Database Engineers (DBEs) that will be implementing RCAC. For a description of function usage IDs, see 2.1, \"Roles\" on page 8. In this example, the DBEs are users MCAIN and HBEDOYA.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.59634399414062, 603.2190551757812, 531.9879150390625, 637.3970336914062], "page": 63, "span": [0, 250], "__ref_s3_data": null}]}, {"text": "Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.24610900878906, 580.7687377929688, 266.8606872558594, 591.4186401367188], "page": 63, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "1. Right-click the database connection and select Application Administration , as shown in Figure 4-15.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80001831054688, 552.19091796875, 544.5436401367188, 574.6569213867188], "page": 63, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "Figure 4-15 Application administration", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0865936279297, 289.0970153808594, 292.8174133300781, 298.57464599609375], "page": 63, "span": [0, 38], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/42"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.49533081054688, 27.961729049682617, 523.5935668945312, 37.22500228881836], "page": 63, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "47", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.54150390625, 27.93828010559082, 547.2591552734375, 37.57440948486328], "page": 63, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "48", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.28382873535156, 27.93828010559082, 78.4020004272461, 37.64601135253906], "page": 64, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.715648651123047, 334.4350891113281, 37.335201263427734], "page": 64, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. The Application Administration window opens, as shown in Figure 4-16. Click IBM i \uf0ae Database and select the function usage ID of Database Security Administrator .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.07907104492188, 698.89013671875, 530.2109985351562, 723.349853515625], "page": 64, "span": [0, 165], "__ref_s3_data": null}]}, {"text": "Figure 4-16 Application administration for IBM i", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.70782470703125, 390.7132568359375, 329.6557312011719, 400.10400390625], "page": 64, "span": [0, 48], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/43"}, {"text": "3. Click Customize for the function usage ID of Database Security Administrator, as shown in Figure 4-17.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.16891479492188, 351.7818908691406, 544.5723266601562, 374.1484375], "page": 64, "span": [0, 105], "__ref_s3_data": null}]}, {"text": "Figure 4-17 Customizing the Database Security Administrator function usage ID", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0600128173828, 169.8534393310547, 459.35382080078125, 179.54444885253906], "page": 64, "span": [0, 77], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/44"}, {"text": "4. The Customize Access window opens, as shown in Figure 4-18. Click the users that need to implement RCAC. For this example, HBEDOYA and MCAIN are selected. Click Add and then click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.06375122070312, 687.2783203125, 547.1973876953125, 721.346923828125], "page": 65, "span": [0, 187], "__ref_s3_data": null}]}, {"text": "Figure 4-18 Customize Access window", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.98194885253906, 377.9023132324219, 297.5574035644531, 387.4375], "page": 65, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/45"}, {"text": "5. The Application Administrator window opens again. The function usage ID of Database Security Administrator now has an X in the Customized Access column, as shown in Figure 4-19.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.29513549804688, 326.8274230957031, 537.650146484375, 361.49859619140625], "page": 65, "span": [0, 180], "__ref_s3_data": null}]}, {"text": "Figure 4-19 Function usage ID Database Security Administrator customized", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.44737243652344, 196.30667114257812, 443.8832092285156, 206.25372314453125], "page": 65, "span": [0, 72], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/46"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.483154296875, 27.942846298217773, 523.5935668945312, 37.24003219604492], "page": 65, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "49", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6575317382812, 27.93828010559082, 547.2591552734375, 37.754249572753906], "page": 65, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "50", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.40753936767578, 27.93828010559082, 78.4020004272461, 37.70213317871094], "page": 66, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.658397674560547, 334.4214172363281, 37.3441162109375], "page": 66, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "6. Run an SQL query that shows which user profiles are enabled to define RCAC. The SQL query is shown in Figure 4-20.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.18582153320312, 698.6884765625, 545.5703735351562, 721.4199829101562], "page": 66, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "Figure 4-20 Query to display user profiles with function usage ID for RCAC", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.12620544433594, 507.701904296875, 438.67242431640625, 517.6649169921875], "page": 66, "span": [0, 74], "__ref_s3_data": null}]}, {"text": "4.3.3 Creating group profiles for the users and their roles", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.1628189086914, 474.81817626953125, 418.56524658203125, 488.2868957519531], "page": 66, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "The next step is to create the different group profiles (ADMIN, CUSTOMER, TELLER, and DBE) and assign the different user profiles to the different group profiles. For a description of the different groups and users for this example, see 4.2, \"Description of the users roles and responsibilities\" on page 39.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.925048828125, 415.64404296875, 547.2724609375, 461.91180419921875], "page": 66, "span": [0, 307], "__ref_s3_data": null}]}, {"text": "Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.11614990234375, 393.6219787597656, 266.8606872558594, 404.0909118652344], "page": 66, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "1. On the main navigation pane of System i Navigator, right-click Groups and select New Group , as shown in Figure 4-21.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 364.826416015625, 535.997802734375, 387.3473205566406], "page": 66, "span": [0, 120], "__ref_s3_data": null}]}, {"text": "Figure 4-21 Creating group profiles", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0914306640625, 191.87730407714844, 281.48504638671875, 201.3507080078125], "page": 66, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/47"}, {"text": "2. The New Group window opens, as shown in Figure 4-22. For each new group, enter the Group name (ADMIN, CUSTOMER, TELLER, and DBE) and add the user profiles that are associated to this group by selecting the user profile and clicking Add .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.197265625, 685.2219848632812, 547.2084350585938, 721.3710327148438], "page": 67, "span": [0, 240], "__ref_s3_data": null}]}, {"text": "Figure 4-22 shows adding user TQSPENCER to the TELLER group profile.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.84339904785156, 670.1790771484375, 482.46234130859375, 680.8810424804688], "page": 67, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "Figure 4-22 Creating group profiles and adding users", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3377685546875, 418.9886779785156, 352.9278259277344, 428.8454895019531], "page": 67, "span": [0, 52], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/48"}, {"text": "3. After you create all the group profiles, you should see them listed in System i Navigator under Users and Groups \uf0ae Groups , as shown in Figure 4-23.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.06529235839844, 380.3785400390625, 537.6182861328125, 402.5232849121094], "page": 67, "span": [0, 151], "__ref_s3_data": null}]}, {"text": "Figure 4-23 Newly created group profiles", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.22373962402344, 229.8401641845703, 304.0131530761719, 239.18055725097656], "page": 67, "span": [0, 40], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/49"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.4501495361328, 27.94533920288086, 523.5935668945312, 37.25483703613281], "page": 67, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "51", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.784423828125, 27.93828010559082, 547.2591552734375, 37.451446533203125], "page": 67, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4.3.4 Creating the CUSTOMER_LOGIN_ID global variable", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.25196838378906, 708.2222290039062, 420.2837219238281, 721.6193237304688], "page": 68, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "In this step, you create a global variable that is used to capture the Customer_Login_ID information, which is required to validate the permissions. For more information about global variables, see 3.2.2, \"Built-in global variables\" on page 19.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.94598388671875, 660.9389038085938, 545.7725219726562, 695.162353515625], "page": 68, "span": [0, 244], "__ref_s3_data": null}]}, {"text": "Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.34487915039062, 638.8994750976562, 266.8606872558594, 649.5125122070312], "page": 68, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "1. From System i Navigator, under the schema Bank_Schema, right-click Global Variable and select New \uf0ae Global Variable , as shown in Figure 4-24.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 610.179931640625, 536.1627197265625, 632.689453125], "page": 68, "span": [0, 145], "__ref_s3_data": null}]}, {"text": "Figure 4-24 Creating a global variable", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.28884887695312, 375.1119689941406, 292.1772766113281, 384.5009765625], "page": 68, "span": [0, 38], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/50"}, {"text": "2. The New Global Variable window opens, as shown in Figure 4-25. Enter the global variable name of CUSTOMER_LOGIN_ID, select the data type of VARCHAR, and leave the default value of NULL. This default value ensures that users that do not use the web interface do not have permission to access the data. Click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.0622100830078, 312.438720703125, 541.1919555664062, 358.7922058105469], "page": 68, "span": [0, 314], "__ref_s3_data": null}]}, {"text": "Figure 4-25 Creating a global variable called CUSTOMER_LOGIN_ID", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.36365509033203, 70.82534790039062, 347.12200927734375, 81.17611694335938], "page": 68, "span": [0, 63], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/51"}, {"text": "52", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.50041961669922, 27.93828010559082, 78.4020004272461, 37.69797897338867], "page": 68, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.74216079711914, 334.4244079589844, 37.310428619384766], "page": 68, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "3. Now that the global variable is created, assign permissions to the variable so that it can be set by the program. Right-click the CUSTOMER_LOGIN_ID global variable and select Permissions , as shown in Figure 4-26.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.37158203125, 686.9842529296875, 547.2542724609375, 721.395263671875], "page": 69, "span": [0, 216], "__ref_s3_data": null}]}, {"text": "Figure 4-26 Setting permissions on the CUSTOMER_LOGIN_ID global variable", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.04354858398438, 540.4573364257812, 457.02374267578125, 550.0607299804688], "page": 69, "span": [0, 72], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/52"}, {"text": "4. The Permissions window opens, as shown in Figure 4-27. Select Change authority for Webuser so that the application can set this global variable.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.89535522460938, 501.52752685546875, 534.23876953125, 524.0010986328125], "page": 69, "span": [0, 147], "__ref_s3_data": null}]}, {"text": "Figure 4-27 Setting change permissions for Webuser on the CUSTOMER_LOGIN_ID global variable", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.090087890625, 203.91615295410156, 540.200439453125, 213.53598022460938], "page": 69, "span": [0, 91], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/53"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.47042846679688, 27.937862396240234, 523.5935668945312, 37.26201248168945], "page": 69, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "53", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.8599853515625, 27.93828010559082, 547.2591552734375, 37.51277542114258], "page": 69, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4.3.5 Defining and creating row permissions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.25285339355469, 708.1456909179688, 339.9589538574219, 721.7003784179688], "page": 70, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "You now ready to define the row permissions of the tables. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81446838378906, 685.0341796875, 527.3794555664062, 695.3024291992188], "page": 70, "span": [0, 88], "__ref_s3_data": null}]}, {"text": "1. From the navigation pane of System i Navigator, click Schemas \uf0ae BANK_SCHEMA , right-click Row Permissions , and select New \uf0ae Row Permission , as shown in Figure 4-28.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 643.4611206054688, 530.7598266601562, 680.3302612304688], "page": 70, "span": [0, 169], "__ref_s3_data": null}]}, {"text": "Figure 4-28 Selecting new row permissions", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.02081298828125, 359.8767395019531, 313.7925720214844, 369.4545593261719], "page": 70, "span": [0, 41], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/54"}, {"text": "54", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.4588623046875, 27.93828010559082, 78.4020004272461, 37.646766662597656], "page": 70, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.39488220214844, 27.697824478149414, 334.4214172363281, 37.306575775146484], "page": 70, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. The New Row Permission window opens, as shown in Figure 4-29. Enter the information regarding the row permissions on the CUSTOMERS table. This row permission defines what is established in the following policy:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.299072265625, 686.8775024414062, 544.505126953125, 721.486083984375], "page": 71, "span": [0, 213], "__ref_s3_data": null}]}, {"text": "-User profiles that belong to DBE, ADMIN, and TELLER group profiles can see all the rows.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.24830627441406, 658.2987060546875, 542.1815185546875, 680.8968505859375], "page": 71, "span": [0, 89], "__ref_s3_data": null}]}, {"text": "-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.38929748535156, 581.2600708007812, 537.7831420898438, 651.4755249023438], "page": 71, "span": [0, 438], "__ref_s3_data": null}]}, {"text": "-Any other user profile cannot see any rows at all.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.33180236816406, 564.0100708007812, 381.3265380859375, 574.4090576171875], "page": 71, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "Select the Enabled option. Click OK .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.9643096923828, 547.23583984375, 314.7688293457031, 557.5918579101562], "page": 71, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "Figure 4-29 New row permissions on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.99803161621094, 252.98165893554688, 384.5369873046875, 262.90484619140625], "page": 71, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/55"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.46267700195312, 27.962182998657227, 523.5935668945312, 37.22908020019531], "page": 71, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "55", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7837524414062, 27.93828010559082, 547.2591552734375, 37.501976013183594], "page": 71, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "3. Define the row permissions for the ACCOUNTS table. The New Row Permission window opens, as shown in Figure 4-30. Enter the information regarding the row permissions on the ACCOUNTS table. This row permission defines what is established in the following policy:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.47308349609375, 674.9223022460938, 543.8363647460938, 721.25634765625], "page": 72, "span": [0, 263], "__ref_s3_data": null}]}, {"text": "-User profiles that belong to DBE, ADMIN and TELLER group profiles can see all the rows.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.37701416015625, 646.2992553710938, 539.4539794921875, 668.8794555664062], "page": 72, "span": [0, 88], "__ref_s3_data": null}]}, {"text": "-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.27256774902344, 569.2606201171875, 537.7576904296875, 639.7597045898438], "page": 72, "span": [0, 438], "__ref_s3_data": null}]}, {"text": "-Any other user profile cannot see any rows at all.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.14382934570312, 552.2808227539062, 381.32696533203125, 562.4356079101562], "page": 72, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "Select the Enabled option. Click OK .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.93441772460938, 535.1266479492188, 314.7692565917969, 545.3858032226562], "page": 72, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "Figure 4-30 New row permissions on the ACCOUNTS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.46953582763672, 197.779296875, 305.92193603515625, 207.01873779296875], "page": 72, "span": [0, 53], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/56"}, {"text": "56", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.427490234375, 27.93828010559082, 78.4020004272461, 37.57078552246094], "page": 72, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.38003540039062, 27.699562072753906, 334.4214172363281, 37.30242156982422], "page": 72, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "4. Define the row permissions on the TRANSACTIONS table. The New Row Permission window opens, as shown in Figure 4-31. Enter the information regarding the row permissions on the TRANSACTIONS table. This row permission defines what is established in the following policy:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.15975952148438, 674.8258666992188, 529.9049072265625, 721.1845703125], "page": 73, "span": [0, 270], "__ref_s3_data": null}]}, {"text": "-User profiles that belong to DBE, ADMIN, and TELLER group profiles can see all of the rows.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.35374450683594, 646.2988891601562, 547.229248046875, 668.8553466796875], "page": 73, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.26327514648438, 569.26025390625, 537.7831420898438, 639.7720336914062], "page": 73, "span": [0, 438], "__ref_s3_data": null}]}, {"text": "Note: You must join back to ACCOUNTS and then to CUSTOMERS by using a subquery to check whether the global variable matches CUSTOMER_LOGIN_ID. Also, if the row permission or column mask rule text references another table with RCAC defined, the RCAC for the referenced table is ignored.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [170.82107543945312, 505.298828125, 533.3919677734375, 551.484375], "page": 73, "span": [0, 285], "__ref_s3_data": null}]}, {"text": "-Any other user profile cannot see any rows at all. Select the Enabled option. Click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.84132385253906, 459.28094482421875, 381.3265380859375, 485.8647155761719], "page": 73, "span": [0, 89], "__ref_s3_data": null}]}, {"text": "Figure 4-31 New row permissions on the TRANSACTIONS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.39651489257812, 68.98163604736328, 325.65087890625, 78.23909759521484], "page": 73, "span": [0, 57], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/57"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.43179321289062, 28.01248550415039, 523.5935668945312, 37.145240783691406], "page": 73, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "57", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.70849609375, 27.93828010559082, 547.2591552734375, 37.42893600463867], "page": 73, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "58", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.43508911132812, 27.93828010559082, 78.4020004272461, 37.60900115966797], "page": 74, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.694499969482422, 334.4214172363281, 37.342376708984375], "page": 74, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "5. To verify that the row permissions are enabled, from System i Navigator, click Row Permissions , as shown in Figure 4-32. The three row permissions are created and enabled.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.46478271484375, 687.2786865234375, 521.1912231445312, 721.3973999023438], "page": 74, "span": [0, 175], "__ref_s3_data": null}]}, {"text": "Figure 4-32 List of row permissions on BANK_SCHEMA", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.27345275878906, 507.8507385253906, 293.2009582519531, 517.7713012695312], "page": 74, "span": [0, 50], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/58"}, {"text": "4.3.6 Defining and creating column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.18543243408203, 474.9947814941406, 327.4058837890625, 488.12005615234375], "page": 74, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "This section defines the masks on the columns. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.18695068359375, 451.67236328125, 479.4200134277344, 461.9885559082031], "page": 74, "span": [0, 76], "__ref_s3_data": null}]}, {"text": "1. From the main navigation pane of System i Navigator, click Schemas \uf0ae BANK_SCHEMA , right-click Column Masks , and select New \uf0ae Column Mask , as shown in Figure 4-33.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.79998779296875, 411.03912353515625, 523.0706787109375, 447.3500061035156], "page": 74, "span": [0, 168], "__ref_s3_data": null}]}, {"text": "Figure 4-33 Creating a column mask", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.06741333007812, 210.88377380371094, 288.04827880859375, 220.59603881835938], "page": 74, "span": [0, 34], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/59"}, {"text": "2. In the New Column Mask window, which is shown in Figure 4-34, enter the following information:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.38919067382812, 699.2781372070312, 524.1021728515625, 721.3534545898438], "page": 75, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "-Select the CUSTOMERS table on which to create the column mask.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.30136108398438, 682.29833984375, 465.4696044921875, 692.8587646484375], "page": 75, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "-Select the Column to mask; in this example, it is CUSTOMER_EMAIL.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.126708984375, 670.2985229492188, 475.1905212402344, 680.7498168945312], "page": 75, "span": [0, 66], "__ref_s3_data": null}]}, {"text": "-Define the masking logic depending on the rules that you want to enforce. In this example, either the ADMIN or CUSTOMER group profiles can see the entire email address; otherwise, it is masked to ****@****.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.1351318359375, 634.299072265625, 531.3062133789062, 668.7997436523438], "page": 75, "span": [0, 207], "__ref_s3_data": null}]}, {"text": "Select the Enabled option. Click OK .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.8079833984375, 617.1079711914062, 314.766845703125, 627.5975952148438], "page": 75, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "Figure 4-34 Defining a column mask on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.46917724609375, 177.73121643066406, 395.4768981933594, 187.4214630126953], "page": 75, "span": [0, 57], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/60"}, {"text": "3. Repeat steps 1 on page 58 and 2 to create column masks for the following columns:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.0830841064453, 151.05543518066406, 522.032958984375, 161.2145538330078], "page": 75, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "-MASK_DRIVERS_LICENSE_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.3282928466797, 134.0785675048828, 381.9765319824219, 144.7090301513672], "page": 75, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "-MASK_LOGIN_ID_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.311767578125, 122.0787582397461, 335.7012023925781, 132.44134521484375], "page": 75, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "-MASK_SECURITY_QUESTION_ANSWER_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.41925048828125, 109.89730834960938, 446.63970947265625, 120.5661392211914], "page": 75, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "-MASK_ACCOUNT_NUMBER_ON_ACCOUNTS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.36715698242188, 98.07913970947266, 379.42840576171875, 108.87069702148438], "page": 75, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "-MASK_SECURITY_QUESTION_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.2811737060547, 86.05474090576172, 397.17034912109375, 96.75386047363281], "page": 75, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "-MASK_TAX_ID_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.15771484375, 74.07952117919922, 322.7781066894531, 84.470703125], "page": 75, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.41281127929688, 27.987712860107422, 523.5935668945312, 37.20513916015625], "page": 75, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "59", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.8348999023438, 27.93828010559082, 547.2591552734375, 37.66927719116211], "page": 75, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4. To verify that the column masks are enabled, from System i Navigator, click Column Masks , as shown in Figure 4-35. The seven column masks are created and enabled.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.83897399902344, 698.9580688476562, 525.707275390625, 721.4640502929688], "page": 76, "span": [0, 166], "__ref_s3_data": null}]}, {"text": "Figure 4-35 List of column masks on BANK_SCHEMA", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.25138854980469, 599.3970336914062, 285.23284912109375, 608.34912109375], "page": 76, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/61"}, {"text": "4.3.7 Restricting the inserting and updating of masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.04530334472656, 566.3858032226562, 433.7906494140625, 579.4189453125], "page": 76, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "This step defines the check constraints that support the column masks to make sure that on INSERTS or UPDATES, data is not written with a masked value. For more information about the propagation of masked data, see 6.8, \"Avoiding propagation of masked data\" on page 108.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.80776977539062, 506.8577575683594, 544.9268798828125, 553.2254638671875], "page": 76, "span": [0, 270], "__ref_s3_data": null}]}, {"text": "Complete the following steps:", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.28085327148438, 484.9208679199219, 266.8606872558594, 495.3861083984375], "page": 76, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "1. Create a check constraint on the column CUSTOMER_EMAIL in the CUSTOMERS table. From the navigation pane of System i Navigator, right-click the CUSTOMERS table and select Definition , as shown Figure 4-36", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.7621612548828, 444.3561096191406, 547.1956787109375, 478.94757080078125], "page": 76, "span": [0, 206], "__ref_s3_data": null}]}, {"text": "Figure 4-36 Definition of the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.76719665527344, 311.3090515136719, 334.2453308105469, 320.839111328125], "page": 76, "span": [0, 45], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/62"}, {"text": "2. From the CUSTOMERS definition window, click the Check Constraints tab and click Add , as shown in Figure 4-37.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.84291076660156, 272.2760314941406, 547.7396240234375, 294.8913879394531], "page": 76, "span": [0, 113], "__ref_s3_data": null}]}, {"text": "Figure 4-37 Adding a check constraint", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.62763977050781, 179.6382293701172, 221.8394317626953, 189.23858642578125], "page": 76, "span": [0, 37], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/63"}, {"text": "60", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.37886810302734, 27.93828010559082, 78.4020004272461, 37.67101287841797], "page": 76, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.39566802978516, 27.68936538696289, 334.4214172363281, 37.33694076538086], "page": 76, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "3. The New Check Constraint window opens, as shown in Figure 4-38. Complete the following steps:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.3705596923828, 698.8788452148438, 515.8154907226562, 721.3765869140625], "page": 77, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "a. Select the CUSTOMER_EMAIL column.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.74546813964844, 682.29833984375, 344.06817626953125, 692.7590942382812], "page": 77, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "b. Enter the check constraint condition. In this example, specify CUSTOMER_EMAIL to be different from ****@****, which is the mask value.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.6258544921875, 653.2589721679688, 541.599853515625, 675.6708984375], "page": 77, "span": [0, 137], "__ref_s3_data": null}]}, {"text": "c. Select the On update violation, preserve column value option and click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.5522003173828, 636.2342529296875, 511.9270324707031, 646.7601928710938], "page": 77, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "Figure 4-38 Specifying a new check constraint on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.45979309082031, 129.19110107421875, 362.47125244140625, 138.60385131835938], "page": 77, "span": [0, 68], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/64"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.4756317138672, 27.958784103393555, 523.5935668945312, 37.23066711425781], "page": 77, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "61", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7523803710938, 27.93828010559082, 547.2591552734375, 37.49419403076172], "page": 77, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4. Figure 4-39 shows that there is now a check constraint on the CUSTOMERS table that prevents any masked data from being updated to the CUSTOMER_EMAIL column.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7649688720703, 698.927978515625, 535.5555419921875, 721.4752197265625], "page": 78, "span": [0, 159], "__ref_s3_data": null}]}, {"text": "Figure 4-39 Check constraint on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.41434478759766, 395.8761901855469, 294.2294006347656, 405.31463623046875], "page": 78, "span": [0, 51], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/65"}, {"text": "5. Create all the other check constraints that are associated to each of the masks on the CUSTOMERS table. After this is done, these constraints should look like the ones that are shown in Figure 4-40.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.1330108642578, 344.68316650390625, 547.2733154296875, 379.44927978515625], "page": 78, "span": [0, 201], "__ref_s3_data": null}]}, {"text": "Figure 4-40 List of check constraints on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.48959350585938, 179.8015899658203, 323.02691650390625, 189.43350219726562], "page": 78, "span": [0, 60], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/66"}, {"text": "62", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.48886108398438, 27.93828010559082, 78.4020004272461, 37.6668586730957], "page": 78, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.68883514404297, 334.4214172363281, 37.32878112792969], "page": 78, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "4.3.8 Activating row and column access control", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.339111328125, 708.2562866210938, 360.40594482421875, 721.5789184570312], "page": 79, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "You are now ready to activate RCAC on all three tables in this example. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.94613647460938, 673.1777954101562, 516.2232055664062, 695.4234008789062], "page": 79, "span": [0, 101], "__ref_s3_data": null}]}, {"text": "1. Start by enabling RCAC on the CUSTOMERS table. From System i Navigator, right-click the CUSTOMERS table and select Definition . As shown in Figure 4-41, make sure that you select Row access control and Column access control . Click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 631.5825805664062, 542.7099609375, 666.0606689453125], "page": 79, "span": [0, 239], "__ref_s3_data": null}]}, {"text": "Figure 4-41 Enabling RCAC on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.31451416015625, 447.11505126953125, 361.9471130371094, 456.8612365722656], "page": 79, "span": [0, 48], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/67"}, {"text": "2. Enable RCAC on the ACCOUNTS table. Right-click the ACCOUNTS table and select Definition . As shown Figure 4-42, make sure that you select Row access control and Column access control . Click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.09698486328125, 396.5186462402344, 537.477783203125, 430.6976623535156], "page": 79, "span": [0, 198], "__ref_s3_data": null}]}, {"text": "Figure 4-42 Enabling RCAC on ACCOUNTS", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.55694580078125, 206.38462829589844, 318.4848937988281, 215.95945739746094], "page": 79, "span": [0, 37], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/68"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.43182373046875, 27.920791625976562, 523.5935668945312, 37.256046295166016], "page": 79, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "63", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7835693359375, 27.93828010559082, 547.2591552734375, 37.51277542114258], "page": 79, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "3. Enable RCAC on the TRANSACTIONS table. Right-click the TRANSACTIONS table and select Definition . As shown in Figure 4-43, make sure that you select Row access control . Click OK .", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.34092712402344, 687.2786865234375, 544.6798706054688, 721.5467529296875], "page": 80, "span": [0, 183], "__ref_s3_data": null}]}, {"text": "Figure 4-43 Enabling RCAC on TRANSACTIONS", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.18115234375, 502.9380187988281, 338.5848083496094, 512.6725463867188], "page": 80, "span": [0, 41], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/69"}, {"text": "4.3.9 Reviewing row permissions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.24539184570312, 470.9652404785156, 271.11932373046875, 484.2687072753906], "page": 80, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "This section displays all the row permissions after enabling RCAC. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.09507751464844, 435.4518737792969, 535.83544921875, 458.0428771972656], "page": 80, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "1. From System i Navigator, click Row Permissions , as shown in Figure 4-44. Three additional Row Permissions are added (QIBM_DEFAULT*). There is one per each row permission.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80099487304688, 394.49609375, 533.23095703125, 428.60400390625], "page": 80, "span": [0, 174], "__ref_s3_data": null}]}, {"text": "Figure 4-44 Row permissions after enabling RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.4681396484375, 198.6104736328125, 271.9492492675781, 208.03958129882812], "page": 80, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/70"}, {"text": "64", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.39739990234375, 27.93828010559082, 78.4020004272461, 37.68030548095703], "page": 80, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.39335632324219, 27.674259185791016, 334.4214172363281, 37.33641052246094], "page": 80, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. Look at one of the row permission definitions by right-clicking it and selecting Definition , as shown in Figure 4-45.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2726593017578, 698.9834594726562, 544.3787231445312, 721.4147338867188], "page": 81, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "Figure 4-45 Selecting row permission definition", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.10995483398438, 541.09619140625, 328.74713134765625, 550.6611938476562], "page": 81, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/71"}, {"text": "3. A window opens, as shown in Figure 4-46. Take note of the nonsensical search condition (0=1) of the QIBM_DEFAULT row permission. This permission is ORed with all of the others and it ensures that if someone does not meet any of the criteria from the row permission then this condition is tested, and because it is false the access is denied.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.38052368164062, 478.2933044433594, 546.0803833007812, 524.6795043945312], "page": 81, "span": [0, 344], "__ref_s3_data": null}]}, {"text": "Figure 4-46 Search condition of the QIBM_DEFAULT row permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.46764373779297, 184.5873565673828, 343.1969299316406, 193.9542236328125], "page": 81, "span": [0, 63], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/72"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.4531707763672, 27.92449188232422, 523.5935668945312, 37.23784255981445], "page": 81, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "65", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7138671875, 27.93828010559082, 547.2591552734375, 37.5822639465332], "page": 81, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4.3.10 Demonstrating data access with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.32689666748047, 708.279541015625, 347.13360595703125, 721.6195068359375], "page": 82, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "You are now ready to test the RCAC definitions. Run the following SQL statements with each type of user (DBE, SECURITY, TELLER, ADMIN, and WEBUSER):", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.72265625, 672.728515625, 547.2556762695312, 695.5126953125], "page": 82, "span": [0, 148], "__ref_s3_data": null}]}, {"text": "GLYPH A SELECT statement that returns the SESSION_USER.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.75660705566406, 656.2430419921875, 390.8248596191406, 666.4733276367188], "page": 82, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "GLYPH A SELECT statement that counts the customers from the CUSTOMER table. There are 90 customers in the CUSTOMER table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.362548828125, 627.279296875, 543.1919555664062, 649.4446411132812], "page": 82, "span": [0, 131], "__ref_s3_data": null}]}, {"text": "GLYPH A simple SELECT statement that returns the following output from the CUSTOMERS table ordered by customer_name:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.49046325683594, 598.1434326171875, 543.6400756835938, 620.7059326171875], "page": 82, "span": [0, 126], "__ref_s3_data": null}]}, {"text": "-c u s t o m e r _ i d", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.32005310058594, 581.2601318359375, 227.99673461914062, 590.979736328125], "page": 82, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "-customer_name", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.21688842773438, 569.2603149414062, 237.2466278076172, 578.581298828125], "page": 82, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "-customer_email", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.34410095214844, 557.260498046875, 236.2086181640625, 566.9081420898438], "page": 82, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "-c u s t o m e r _ t a x _ i d", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.4298553466797, 545.2606811523438, 246.89581298828125, 554.9027099609375], "page": 82, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "-customer_drivers_license_number", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.24029541015625, 533.2608642578125, 318.4402160644531, 543.2066040039062], "page": 82, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "Data access for a DBE user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.72146606445312, 505.5359802246094, 357.2575378417969, 517.6439819335938], "page": 82, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "To test a DBE (MCAIN) user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.34576416015625, 492.2123107910156, 394.5498352050781, 502.6340026855469], "page": 82, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-47. In this example, MCAIN is the DBE user.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 462.9244689941406, 531.4830322265625, 485.1828308105469], "page": 82, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "Figure 4-47 DBE session user", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.22366333007812, 341.9969787597656, 263.2569580078125, 351.207763671875], "page": 82, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "2. The number of rows that the DBE user MCAIN can see is shown in Figure 4-48.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.00750732421875, 314.7911682128906, 503.1596374511719, 325.3352966308594], "page": 82, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "Figure 4-48 Number of rows that DBE user can see in the CUSTOMERS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.29940795898438, 155.87709045410156, 452.1958312988281, 165.4332275390625], "page": 82, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "66", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.38954162597656, 27.93828010559082, 78.4020004272461, 37.51285171508789], "page": 82, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.38802337646484, 27.724411010742188, 334.4214172363281, 37.31571578979492], "page": 82, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "3. The result of the third SQL statement is shown in Figure 4-49. Note the masked columns. User MCAIN can see all the rows in the CUSTOMERS table, but there are some columns where the result is masked.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.4130859375, 687.2783203125, 547.291015625, 721.2689819335938], "page": 83, "span": [0, 201], "__ref_s3_data": null}]}, {"text": "Figure 4-49 SQL statement that is run by the DBE user with masked columns", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.25863647460938, 312.47698974609375, 376.5732727050781, 321.8323059082031], "page": 83, "span": [0, 73], "__ref_s3_data": null}]}, {"text": "Data access for SECURITY user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.4450225830078, 283.8960876464844, 382.60321044921875, 296.25677490234375], "page": 83, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "To test a SECURITY user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2757110595703, 270.2940368652344, 382.8707580566406, 280.9367370605469], "page": 83, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-50. In this example, SECURITY is the security officer.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 241.54782104492188, 531.4830322265625, 263.9823303222656], "page": 83, "span": [0, 163], "__ref_s3_data": null}]}, {"text": "Figure 4-50 SECURITY session user", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.2332305908203, 95.13304138183594, 289.78204345703125, 104.47471618652344], "page": 83, "span": [0, 33], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/73"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.4011688232422, 27.97011375427246, 523.5935668945312, 37.246681213378906], "page": 83, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "67", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7114868164062, 27.93828010559082, 547.2591552734375, 37.56051254272461], "page": 83, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "68", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.40093231201172, 27.93828010559082, 78.4020004272461, 37.56172180175781], "page": 84, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.33565521240234, 27.722749710083008, 334.4214172363281, 37.3211555480957], "page": 84, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. The number of rows in the CUSTOMERS table that the security officer can see is shown in Figure 4-51. The security officer cannot see any data at all.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.1411895751953, 698.8203735351562, 547.1937866210938, 721.4033813476562], "page": 84, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "Figure 4-51 Number of rows that the security officer can see in the CUSTOMERS table", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.25540161132812, 554.5038452148438, 487.09649658203125, 563.9228515625], "page": 84, "span": [0, 83], "__ref_s3_data": null}]}, {"text": "3. The result of the third SQL statement is shown in Figure 4-52. Note the empty set that is returned to the security officer.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.8675537109375, 515.6644287109375, 542.7387084960938, 538.0418090820312], "page": 84, "span": [0, 126], "__ref_s3_data": null}]}, {"text": "Figure 4-52 SQL statement that is run by the SECURITY user - no results", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.28270721435547, 341.8423767089844, 362.2696838378906, 351.24163818359375], "page": 84, "span": [0, 71], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/74"}, {"text": "Data access for TELLER user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.57626342773438, 313.4159851074219, 368.6448059082031, 325.6100769042969], "page": 84, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "To test a Teller (TQSPENCER) user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.22488403320312, 299.5154113769531, 427.7564697265625, 310.1946716308594], "page": 84, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "1. Confirm that the TELLER user is the user of the session by running the first SQL statement, as shown in Figure 4-53. In this example, TQSPENCER is a TELLER user.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.79998779296875, 271.1532897949219, 530.67822265625, 293.5227355957031], "page": 84, "span": [0, 164], "__ref_s3_data": null}]}, {"text": "Figure 4-53 TELLER session user", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.19703674316406, 110.5588150024414, 278.4291076660156, 119.93122863769531], "page": 84, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "2. The number of rows in the CUSTOMERS table that the TELLER user can see is shown in Figure 4-54. The TELLER user can see all the rows.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.14108276367188, 699.0534057617188, 547.2401733398438, 721.4407958984375], "page": 85, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "Figure 4-54 Number of rows that the TELLER user can see in the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3040771484375, 561.177001953125, 482.8044738769531, 570.7412719726562], "page": 85, "span": [0, 78], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/75"}, {"text": "3. The result of the third SQL statement is shown in Figure 4-55. Note the masked columns. The TELLER user, TQSPENSER, can see all the rows, but there are some columns where the result is masked.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.166259765625, 510.33868408203125, 547.2914428710938, 544.6939086914062], "page": 85, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "Figure 4-55 SQL statement that is run by the TELLER user with masked columns", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.32777404785156, 110.41401672363281, 392.1126403808594, 119.69708251953125], "page": 85, "span": [0, 76], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.44871520996094, 27.97879981994629, 523.5935668945312, 37.242225646972656], "page": 85, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "69", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7135620117188, 27.93828010559082, 547.2591552734375, 37.74858474731445], "page": 85, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "70", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.07074737548828, 27.93828010559082, 78.4020004272461, 37.85115432739258], "page": 86, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.40281677246094, 27.719274520874023, 334.4216003417969, 37.32561111450195], "page": 86, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Data access for ADMIN user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.5863037109375, 709.5360107421875, 361.28759765625, 721.4993286132812], "page": 86, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "To test an ADMIN (VGLUCCHESS) user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.27212524414062, 696.2108154296875, 448.129638671875, 707.0377197265625], "page": 86, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "1. Confirm that the ADMIN user is the user of the session by running the first SQL statement, as shown in Figure 4-56. In this example, VGLUCCHESS is an ADMIN user.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 667.0796508789062, 547.238525390625, 689.3929443359375], "page": 86, "span": [0, 164], "__ref_s3_data": null}]}, {"text": "Figure 4-56 ADMIN session user", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.30274963378906, 557.8770141601562, 274.6385803222656, 567.1309814453125], "page": 86, "span": [0, 30], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/76"}, {"text": "2. The number of rows that the ADMIN user can see is shown in Figure 4-57. The ADMIN user can see all the rows.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.12391662597656, 519.0384521484375, 537.4520263671875, 541.3179931640625], "page": 86, "span": [0, 111], "__ref_s3_data": null}]}, {"text": "Figure 4-57 Number of rows that the ADMIN can see in the CUSTOMERS table", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.0926971435547, 411.29229736328125, 457.6079406738281, 420.69305419921875], "page": 86, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "3. The result of the third SQL statement is shown in Figure 4-58. There are no masked columns.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.28590393066406, 699.2781372070312, 524.1978759765625, 721.4224853515625], "page": 87, "span": [0, 94], "__ref_s3_data": null}]}, {"text": "Figure 4-58 SQL statement that is run by the ADMIN user - no masked columns", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.31376647949219, 303.7611083984375, 386.8026123046875, 313.2142028808594], "page": 87, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "Data access for WEBUSER user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.420166015625, 275.79608154296875, 383.07720947265625, 287.52459716796875], "page": 87, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "To test a CUSTOMERS (WEBUSER) user that accesses the database by using the web application, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1792449951172, 250.30612182617188, 527.5846557617188, 272.6474304199219], "page": 87, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-59. In this example, WEBUSER is a CUSTOMER user.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80001831054688, 220.9036102294922, 531.4830322265625, 243.4668731689453], "page": 87, "span": [0, 157], "__ref_s3_data": null}]}, {"text": "Figure 4-59 WEBUSER session user", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.7146453857422, 106.48837280273438, 289.7508239746094, 115.76629638671875], "page": 87, "span": [0, 32], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/77"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.3849639892578, 27.97275733947754, 523.5935668945312, 37.253326416015625], "page": 87, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "71", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.3804931640625, 27.93828010559082, 547.2591552734375, 37.64752197265625], "page": 87, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "72", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.16098022460938, 27.93828010559082, 78.4020004272461, 37.712100982666016], "page": 88, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.71753692626953, 334.4482116699219, 37.33603286743164], "page": 88, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. A global variable (CUSTOMER_LOGIN_ID) is set by the web application and then is used to check the row permissions. Figure 4-60 shows setting the global variable by using the customer login ID.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.35842895507812, 686.8118286132812, 547.2925415039062, 721.3489379882812], "page": 88, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "Figure 4-60 Setting the global variable CUSTOMER_LOGIN_ID", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.25631713867188, 559.1651000976562, 394.8086242675781, 568.734130859375], "page": 88, "span": [0, 57], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/78"}, {"text": "3. Verify that the global variable was set with the correct value by clicking the Global Variable tab, as shown in Figure 4-61.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.11886596679688, 520.3414916992188, 514.379638671875, 542.4933471679688], "page": 88, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "Figure 4-61 Viewing the global variable value", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.23065185546875, 261.1766357421875, 320.9690246582031, 270.5703430175781], "page": 88, "span": [0, 45], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/79"}, {"text": "4. The number of rows that the WEBUSER can see is shown in Figure 4-62. This user can see only the one row that belongs to his web-based user ID.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.91624450683594, 222.26995849609375, 541.2606811523438, 244.6071319580078], "page": 88, "span": [0, 145], "__ref_s3_data": null}]}, {"text": "Figure 4-62 Number of rows that the WEBUSER can see in the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.35362243652344, 96.42333221435547, 474.386962890625, 106.09137725830078], "page": 88, "span": [0, 74], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/80"}, {"text": "5. The result of the third SQL statement is shown in Figure 4-63. There are no masked columns, and the user can see only one row, which is the user's own row.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2490234375, 699.19580078125, 524.1978759765625, 721.304931640625], "page": 89, "span": [0, 158], "__ref_s3_data": null}]}, {"text": "Figure 4-63 SQL statement that is run by WEBUSER - no masked columns", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.12274932861328, 529.7809448242188, 368.16168212890625, 539.2711791992188], "page": 89, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "Other examples of data access with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.15013122558594, 501.2431640625, 377.4743957519531, 513.6253662109375], "page": 89, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "To run an SQL statement that lists all the accounts and current balance by customer, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9036865234375, 475.62322998046875, 512.9512939453125, 497.89947509765625], "page": 89, "span": [0, 114], "__ref_s3_data": null}]}, {"text": "1. Run the SQL statement that is shown in Figure 4-64 using the WEBUSER user profile. The SQL statement has no WHERE clause, but the WEBUSER can see only his accounts.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 435.0992736816406, 535.2608642578125, 469.3912353515625], "page": 89, "span": [0, 167], "__ref_s3_data": null}]}, {"text": "Figure 4-64 List of accounts and current balance by customer using the WEBUSER user profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.2671356201172, 238.4688262939453, 520.2247314453125, 247.89683532714844], "page": 89, "span": [0, 91], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.42453002929688, 27.92449188232422, 523.5935668945312, 37.203250885009766], "page": 89, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "73", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6150512695312, 27.93828010559082, 547.2591552734375, 37.539588928222656], "page": 89, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "74", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.05266571044922, 27.93828010559082, 78.4020004272461, 37.82094192504883], "page": 90, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.41647338867188, 27.65258026123047, 334.4214172363281, 37.339962005615234], "page": 90, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. Figure 4-65 shows running a more complex SQL statement that calculates transaction total by account for year and quarter. Run this statement using the WEBUSER profile. The SQL statement has no WHERE clause, but the WEBUSER user can see only his transactions.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.34283447265625, 675.2788696289062, 547.2566528320312, 721.446044921875], "page": 90, "span": [0, 261], "__ref_s3_data": null}]}, {"text": "Figure 4-65 Calculate transaction total by account for year and quarter using the WEBUSER profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [136.00137329101562, 281.208740234375, 534.98046875, 290.7270812988281], "page": 90, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "3. Run the same SQL statement that lists the accounts and current balance by customer, but use a TELLER user profile. The result of this SQL statement is shown in Figure 4-66. The TELLER user can see all the rows in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.26443481445312, 687.2783203125, 547.2400512695312, 721.2547607421875], "page": 91, "span": [0, 236], "__ref_s3_data": null}]}, {"text": "Figure 4-66 List of accounts and current balance by customer using a TELLER user profile", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [135.93475341796875, 289.7537841796875, 501.49462890625, 299.6206970214844], "page": 91, "span": [0, 88], "__ref_s3_data": null}]}, {"text": "4.3.11 Query implementation with RCAC activated", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.13478088378906, 257.6565856933594, 375.0662841796875, 270.7911071777344], "page": 91, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "This section looks at some other interesting information that is related to RCAC by comparing the access plans of the same SQL statement without RCAC and with RCAC. This example uses Visual Explain and runs an SQL statement that lists the accounts and current balance by customer.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.62245178222656, 198.10879516601562, 547.1669311523438, 244.65867614746094], "page": 91, "span": [0, 280], "__ref_s3_data": null}]}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.41653442382812, 27.961427688598633, 523.5935668945312, 37.21397399902344], "page": 91, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "75", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5612182617188, 27.93828010559082, 547.2591552734375, 37.648582458496094], "page": 91, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "76", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.971797943115234, 27.93828010559082, 78.4020004272461, 37.69707107543945], "page": 92, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.41017150878906, 27.720333099365234, 334.4214172363281, 37.31072998046875], "page": 92, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Complete the following steps:", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.24771118164062, 710.7789306640625, 266.8606872558594, 721.4140625], "page": 92, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "1. Figure 4-67 shows the SQL statement in Visual Explain ran with no RCAC. The implementation of the SQL statement is a two-way join, which is exactly what the SQL statement is doing.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 670.173583984375, 532.4749755859375, 704.4243774414062], "page": 92, "span": [0, 183], "__ref_s3_data": null}]}, {"text": "Figure 4-67 Visual Explain with no RCAC enabled", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.00106811523438, 281.8343505859375, 340.9333801269531, 291.2391052246094], "page": 92, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/81"}, {"text": "2. Figure 4-68 shows the Visual Explain of the same SQL statement, but with RCAC enabled. It is clear that the implementation of the SQL statement is more complex because the row permission rule becomes part of the WHERE clause.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2415771484375, 686.9530639648438, 514.048583984375, 721.3856811523438], "page": 93, "span": [0, 228], "__ref_s3_data": null}]}, {"text": "Figure 4-68 Visual Explain with RCAC enabled", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0782928466797, 302.49420166015625, 328.2711486816406, 312.02569580078125], "page": 93, "span": [0, 44], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/82"}, {"text": "3. Compare the advised indexes that are provided by the Optimizer without RCAC and with RCAC enabled. Figure 4-69 shows the index advice for the SQL statement without RCAC enabled. The index being advised is for the ORDER BY clause.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2212677001953, 251.74862670898438, 547.2394409179688, 286.0174865722656], "page": 93, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "Figure 4-69 Index advice with no RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.5755615234375, 115.8077621459961, 227.5344696044922, 125.41432189941406], "page": 93, "span": [0, 37], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/83"}, {"text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [214.50466918945312, 27.953571319580078, 523.5935668945312, 37.28648376464844], "page": 93, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "77", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.4413452148438, 27.93828010559082, 547.36328125, 37.53264236450195], "page": 93, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4. Now, look at the advised indexes with RCAC enabled. As shown in Figure 4-70, there is an additional index being advised, which is basically for the row permission rule. For more information, see 6.4.2, \"Index advisor\" on page 99.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.06967163085938, 687.2786865234375, 547.188720703125, 721.38037109375], "page": 94, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "Figure 4-70 Index advice with RCAC enabled", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.43022918701172, 544.4075927734375, 250.1027374267578, 553.8165893554688], "page": 94, "span": [0, 42], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/84"}, {"text": "78", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.0842056274414, 27.93828010559082, 78.4020004272461, 37.74510955810547], "page": 94, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.6739559173584, 334.4214172363281, 37.38203048706055], "page": 94, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/85"}, {"text": "Chapter 5.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 95, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "5", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 95, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "RCAC and non-SQL interfaces", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 513.0821533203125, 511.1795959472656, 538.5230712890625], "page": 95, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "A benefit of Row and Column Access Control (RCAC) is that its security controls are enforced across all the interfaces that access DB2 for i because the security rules are defined and enforced at the database level. The examples that are shown in this paper focus on SQL-based access, but row permissions and column masks also are enforced for non-SQL interfaces, such as native record-level access in RPG and COBOL programs and CL commands, such as Display Physical File Member ( DSPPFM ) and Copy File ( CPYF ).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.72430419921875, 405.0442199707031, 547.181884765625, 475.4884948730469], "page": 95, "span": [0, 513], "__ref_s3_data": null}]}, {"text": "This consistent enforcement across all interfaces is a good thing, but there are some nuances and restrictions as a result of applying an SQL-based technology such as RCAC to non-SQL interfaces. These considerations are described in this chapter.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84396362304688, 359.08001708984375, 547.2554931640625, 393.4468994140625], "page": 95, "span": [0, 246], "__ref_s3_data": null}]}, {"text": "The following topics are covered in this chapter in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.23419189453125, 337.0604553222656, 412.4370422363281, 347.3124084472656], "page": 95, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "GLYPH Unsupported interfaces", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.71270751953125, 319.21295166015625, 254.5669708251953, 329.8729553222656], "page": 95, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "GLYPH Native query result differences", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6899871826172, 308.0808410644531, 285.9798278808594, 317.739501953125], "page": 95, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "GLYPH Accidental updates with masked values", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.55894470214844, 295.8805847167969, 325.0879211425781, 305.8946838378906], "page": 95, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "GLYPH System CL commands considerations", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6660919189453, 283.8162536621094, 318.9596252441406, 293.9013366699219], "page": 95, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.00144958496094, 27.78173828125, 257.24334716796875, 37.335731506347656], "page": 95, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "79", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.47705078125, 27.93828010559082, 547.2591552734375, 37.86309051513672], "page": 95, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "5.1 Unsupported interfaces", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.36381530761719, 702.2925415039062, 275.70184326171875, 718.2304077148438], "page": 96, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "It is not possible to create a row permission or column mask on a distributed table or a program-described file.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0512237548828, 663.3173828125, 519.7969970703125, 685.9818725585938], "page": 96, "span": [0, 112], "__ref_s3_data": null}]}, {"text": "After a row permission or column mask is added to a table, there are some data access requests that no longer work. An attempt to open or query a table with activated RCAC controls involving any of the following scenarios is rejected with the CPD43A4 error message:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9476776123047, 617.174072265625, 547.2138061523438, 651.9033813476562], "page": 96, "span": [0, 265], "__ref_s3_data": null}]}, {"text": "GLYPH A logical file with multiple formats if the open attempt requests more than one format.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5137176513672, 600.9810180664062, 526.0348510742188, 611.3893432617188], "page": 96, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "GLYPH A table or query that specifies an ICU 2.6.1 sort sequence.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56863403320312, 589.0631103515625, 410.4948425292969, 598.9420776367188], "page": 96, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "GLYPH A table with read triggers.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.54893493652344, 576.8346557617188, 264.3358459472656, 587.2266845703125], "page": 96, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "This unsupported interface error occurs when a table with RCAC controls is accessed, not when the RCAC control is created and activated.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.76893615722656, 543.160400390625, 537.0208129882812, 565.207763671875], "page": 96, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "For example, assume that there is a physical file, PF1, which is referenced by a single format logical file (LFS) and a multi-format logical file (LFM). A row permission is successfully created and activated for PF1. Any application that accesses PF1 directly or LFS continues to work without any issues. However, any application that opens LFM with multiple formats receives an error on the open attempt after the row permission is activated for PF1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.05755615234375, 473.14154052734375, 547.275634765625, 531.4187622070312], "page": 96, "span": [0, 451], "__ref_s3_data": null}]}, {"text": "Important: This potential runtime error places a heavy emphasis on a comprehensive testing plan to ensure that all programs are tested. If testing uncovers an unsupported interface, then you must investigate whether the application can be rewritten to use a data access interface that is supported by RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.07794189453125, 408.80841064453125, 541.2369995117188, 455.4422912597656], "page": 96, "span": [0, 306], "__ref_s3_data": null}]}, {"text": "5.2 Native query result differences", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.42249298095703, 360.3178405761719, 329.61151123046875, 376.4193420410156], "page": 96, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "The SQL Query Engine (SQE) is the only engine that is enhanced by IBM to enforce RCAC controls on query requests. In order for native query requests to work with RCAC, these native query requests are now processed by SQE instead of the Classic Query Engine (CQE). Native query requests can consist of the following items:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.82437133789062, 297.52056884765625, 542.3941650390625, 344.1916198730469], "page": 96, "span": [0, 321], "__ref_s3_data": null}]}, {"text": "GLYPH Query/400", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.66761779785156, 281.1392517089844, 198.2183074951172, 291.8307189941406], "page": 96, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "GLYPH QQQQRY API", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.72911071777344, 269.1394348144531, 214.61248779296875, 280.1279602050781], "page": 96, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "GLYPH Open Query File ( OPNQRYF ) command", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.67552185058594, 257.1396179199219, 315.83990478515625, 267.3670959472656], "page": 96, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "GLYPH Run Query ( RUNQRY ) command", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4887237548828, 244.85897827148438, 285.8927307128906, 255.41610717773438], "page": 96, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "GLYPH Native open (RPG, COBOL, OPNDBF, and so on) of an SQL view", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4298553466797, 232.98248291015625, 441.65673828125, 243.50750732421875], "page": 96, "span": [0, 74], "__ref_s3_data": null}]}, {"text": "Legacy queries that have been running without any issues for many years and over many IBM i releases are now processed by a different query engine. As a result, the runtime behavior and results that are returned can be different for native query requests with RCAC enabled. The OPNQRYF command and Query/400 run with SQE by default.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.82940673828125, 175.08465576171875, 541.6351928710938, 221.27053833007812], "page": 96, "span": [0, 332], "__ref_s3_data": null}]}, {"text": "The following list documents some of the query output differences that can occur when native query requests are processed by CQE:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6905975341797, 140.7740020751953, 547.283447265625, 163.3935089111328], "page": 96, "span": [0, 129], "__ref_s3_data": null}]}, {"text": "GLYPH Different ordering in the result set", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69326782226562, 123.73572540283203, 299.5278015136719, 134.05801391601562], "page": 96, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "GLYPH Different values for null columns or columns with errors", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6090850830078, 111.89027404785156, 393.4990234375, 122.04586791992188], "page": 96, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "GLYPH Suppression of some mapping error messages", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.68191528320312, 99.16911315917969, 358.4886169433594, 109.94051361083984], "page": 96, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "GLYPH Loss of RRN positioning capabilities", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.53482055664062, 87.86771392822266, 310.5740661621094, 98.40489196777344], "page": 96, "span": [0, 52], "__ref_s3_data": null}]}, {"text": "GLYPH Duplicate key processing behavior differences", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56546020507812, 75.50570678710938, 354.23272705078125, 86.25474548339844], "page": 96, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "GLYPH Missing key feedback", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5389404296875, 63.51583480834961, 246.47032165527344, 73.92317962646484], "page": 96, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "80", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.39680480957031, 27.93828010559082, 78.4020004272461, 37.634681701660156], "page": 96, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.37086486816406, 27.725845336914062, 334.4214172363281, 37.295021057128906], "page": 96, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "For a list of the differences and additional details, see the IBM i Memo to Users Version 7.2 , found at:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.34434509277344, 699.2781372070312, 543.0580444335938, 721.3868408203125], "page": 97, "span": [0, 105], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgmtu.htm", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.02487182617188, 681.9579467773438, 521.9223022460938, 692.5218505859375], "page": 97, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "In addition, the performance of a native query with SQE can be different. It is possible that a new index or keyed logical file might need to be created to improve the performance.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06448364257812, 648.2789306640625, 544.6605834960938, 670.6199951171875], "page": 97, "span": [0, 180], "__ref_s3_data": null}]}, {"text": "Important: Based on the potential impacts of query result set and performance differences, you should perform extensive functional testing and performance benchmarking of applications and reports that use native query interfaces.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.0964813232422, 595.4119873046875, 495.4548034667969, 630.0718994140625], "page": 97, "span": [0, 229], "__ref_s3_data": null}]}, {"text": "5.3 Accidental updates with masked values", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.58708953857422, 547.8720092773438, 396.822265625, 563.6151123046875], "page": 97, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "The masked values that are returned by a column mask can potentially cause the original data value to be accidentally overwritten, especially with applications using native record-level access.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9681854248047, 497.2587890625, 547.184814453125, 531.249755859375], "page": 97, "span": [0, 193], "__ref_s3_data": null}]}, {"text": "For example, consider a table containing three columns of first name, last name, and tax ID that is read by an RPG program. The user running the program is not authorized to see the tax ID value, so a masked value (*****3333) is written into the program's record buffer, as shown Figure 5-1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0658416748047, 439.1994934082031, 541.6968994140625, 485.2427062988281], "page": 97, "span": [0, 291], "__ref_s3_data": null}]}, {"text": "In this example, the application reads the data for an update to correct the misspelling of the last name. The last name value is changed to Smith in the buffer. Now, a WRITE request is issued by the program, which uses the contents of the record buffer to update the row in the underlying DB2 table. Unfortunately, the record buffer still contains a masked value for the tax ID, so the tax ID value in the table is accidentally set to the masked value.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.98301696777344, 369.28070068359375, 547.1439819335938, 427.1898498535156], "page": 97, "span": [0, 453], "__ref_s3_data": null}]}, {"text": "Figure 5-1 Accidental update with masked values scenario", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.35667419433594, 60.42193603515625, 374.1333312988281, 69.67607116699219], "page": 97, "span": [0, 56], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/86"}, {"text": "Chapter 5. RCAC and non-SQL interfaces", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [353.7305603027344, 27.851152420043945, 523.6332397460938, 37.11669158935547], "page": 97, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "81", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.676513671875, 27.93828010559082, 547.2591552734375, 37.49736785888672], "page": 97, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "82", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.46670532226562, 27.93828010559082, 78.4020004272461, 37.6083984375], "page": 98, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.70258331298828, 334.4214172363281, 37.288448333740234], "page": 98, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Obviously, careful planning and testing should be exercised to avoid accidental updates with masked values.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.01290893554688, 699.2785034179688, 545.1429443359375, 721.2443237304688], "page": 98, "span": [0, 107], "__ref_s3_data": null}]}, {"text": "DB2 for i also enhanced its check constraint support in the IBM i 7.2 release with a new ON UPDATE clause that allows the existing value to be preserved when a masked value is detected by a check constraint. Details about how to employ this new check constraint support can be found in 6.8.1, \"Check constraint solution\" on page 108.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.98336791992188, 641.1885986328125, 547.2675170898438, 687.29736328125], "page": 98, "span": [0, 333], "__ref_s3_data": null}]}, {"text": "5.4 System CL commands considerations", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.53089904785156, 597.7332763671875, 385.5848388671875, 613.8056030273438], "page": 98, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "As stated earlier, RCAC controls are enforced on all data access interfaces. This enforcement is not limited to programmatic interfaces; it also includes system CL commands that read and insert data, such as the Create Duplicate Object ( CRTDUPOBJ ) and Start DFU ( STRDFU ) CL commands. This section documents the behavior of the Create Duplicate Object ( CRTDUPOBJ ), Copy File ( CPYF ), and Copy Library ( CPYLIB ) CL commands with RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91700744628906, 522.7411499023438, 547.4896240234375, 581.418212890625], "page": 98, "span": [0, 440], "__ref_s3_data": null}]}, {"text": "5.4.1 Create Duplicate Object (CRTDUPOBJ) command", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.38429260253906, 490.38629150390625, 405.0467224121094, 503.6981506347656], "page": 98, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "The CRTDUPOBJ command is enhanced with a new Access Control ( ACCCTL ) parameter in the IBM i 7.2 release to copy RCAC controls to the new object being created. Row permissions and column masks are copied to the new object by default because the default value for the ACCCTL parameter is *ALL .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.77301025390625, 431.25885009765625, 542.9708251953125, 477.3899230957031], "page": 98, "span": [0, 294], "__ref_s3_data": null}]}, {"text": "If the invoker of the CRTDUPOBJ command asks for data to be copied with a value of *YES for the DATA parameter, the value of the ACCCTL parameter must be *ALL . If not, the command invocation receives an error.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.04022216796875, 385.2994079589844, 538.5584716796875, 419.4244689941406], "page": 98, "span": [0, 210], "__ref_s3_data": null}]}, {"text": "When data is copied to the duplicated object with the DATA parameter, all rows and unmasked column values are copied into the new object, even if the command invoker is not authorized to view all rows or certain column values. This behavior occurs because the RCAC controls also are copied to the new object. The copied RCAC controls enforce that only authorized users are allowed to view row and column values in the newly duplicated object.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.79270935058594, 315.28057861328125, 547.2168579101562, 373.29388427734375], "page": 98, "span": [0, 442], "__ref_s3_data": null}]}, {"text": "5.4.2 Copy File (CPYF) command", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.39613342285156, 282.42333984375, 270.95599365234375, 295.696533203125], "page": 98, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "The CPYF command copies only data, so there is no new parameter to copy RCAC controls to the target table. Therefore, if CPYF is used to create a target table, there are no RCAC controls placed on the target table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81484985351562, 234.62559509277344, 547.2855224609375, 269.47161865234375], "page": 98, "span": [0, 214], "__ref_s3_data": null}]}, {"text": "When RCAC controls are in place on the source table, the CPYF command is limited to reading rows and column values that are based on the invoker of the CPYF command. If a user is authorized to see all rows and column values, then all rows and unmasked column values are copied to the target table (assuming no RCAC controls are on the target table). If a user without full access runs the CPYF command, the CPYF command can copy only a subset of the rows into the target table. In addition, if that user can view only masked column values, then masked values are copied into the target table. This also applies to the Copy to Import File ( CPYTOIMPF ) command.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9669189453125, 129.28024291992188, 547.3273315429688, 223.54742431640625], "page": 98, "span": [0, 660], "__ref_s3_data": null}]}, {"text": "If the target table has RCAC controls defined and activated, then the CPYF command is allowed only to add or replace rows in the target table based on the RCAC controls. If CPYF tries to add a row to the target table that the command invoker is not allowed to view according to the target RCAC controls, then an error is received.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0917205810547, 71.02362060546875, 535.9159545898438, 117.43394470214844], "page": 98, "span": [0, 330], "__ref_s3_data": null}]}, {"text": "5.4.3 Copy Library (CPYLIB) command", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.36076354980469, 708.317626953125, 305.67718505859375, 721.67529296875], "page": 99, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "The CPYLIB command is enhanced with the same Access Control ( ACCCTL ) parameter as the CRTDUPOBJ command in the IBM i 7.2 release (see 5.4.1, \"Create Duplicate Object (CRTDUPOBJ) command\" on page 82). Row permissions and column masks are copied to the new object in the new library by default because the default value for the ACCCTL parameter is *ALL .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.80535888671875, 637.099365234375, 544.9737548828125, 695.4727783203125], "page": 99, "span": [0, 354], "__ref_s3_data": null}]}, {"text": "Chapter 5. RCAC and non-SQL interfaces", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [353.8760070800781, 27.865577697753906, 523.6332397460938, 37.186405181884766], "page": 99, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "83", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7760009765625, 27.93828010559082, 547.2591552734375, 37.517459869384766], "page": 99, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "84", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.56328582763672, 27.93828010559082, 78.4020004272461, 37.691104888916016], "page": 100, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.69057273864746, 334.5158386230469, 37.364131927490234], "page": 100, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/87"}, {"text": "Chapter 6.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 101, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "Additional considerations", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.1068572998047, 513.0821533203125, 455.59796142578125, 538.5267944335938], "page": 101, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "This chapter covers additional considerations that must be taken into account when implementing Row and Column Access Control (RCAC), including the following functions:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.85086059570312, 452.9068603515625, 531.345458984375, 475.37335205078125], "page": 101, "span": [0, 168], "__ref_s3_data": null}]}, {"text": "GLYPH Timing of column masking", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.82879638671875, 435.5367736816406, 267.31884765625, 445.6518859863281], "page": 101, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "GLYPH Data movement", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65838623046875, 424.05908203125, 221.50328063964844, 433.73193359375], "page": 101, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "GLYPH Joins", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.8079376220703, 412.05926513671875, 174.7480926513672, 421.72412109375], "page": 101, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "GLYPH Views", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.80653381347656, 400.0594482421875, 177.69752502441406, 409.92095947265625], "page": 101, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "GLYPH Materialized query tables", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.71363830566406, 387.4857177734375, 262.4305114746094, 398.0342102050781], "page": 101, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "GLYPH Index advisor", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69224548339844, 376.059814453125, 210.2908477783203, 386.0014953613281], "page": 101, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "GLYPH Monitoring, analysis, and debugging", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.62872314453125, 363.18524169921875, 310.97344970703125, 373.8130798339844], "page": 101, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "GLYPH Performance and scalability", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.52769470214844, 351.5644226074219, 273.3426513671875, 362.0223693847656], "page": 101, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.10952758789062, 329.46014404296875, 347.4121398925781, 340.1173400878906], "page": 101, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Timing of column masking", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.86045837402344, 312.50946044921875, 267.31884765625, 323.55072021484375], "page": 101, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC effects on data movement", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.68113708496094, 301.06097412109375, 296.47052001953125, 311.4767150878906], "page": 101, "span": [0, 45], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC effects on joins", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7341766357422, 289.0611572265625, 248.6178741455078, 299.3589172363281], "page": 101, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "GLYPH Monitoring, analyzing, and debugging with RCAC", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7424774169922, 276.7906188964844, 368.6199951171875, 287.5904235839844], "page": 101, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "GLYPH Views, materialized query tables, and query rewrite with RCAC", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.42808532714844, 264.9301452636719, 428.5085754394531, 275.88232421875], "page": 101, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC effects on performance and scalability", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.52320861816406, 252.71844482421875, 349.4490051269531, 263.50238037109375], "page": 101, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "GLYPH Exclusive lock to implement RCAC (availability issues)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65530395507812, 240.60609436035156, 390.4403381347656, 251.41629028320312], "page": 101, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "GLYPH Avoiding propagation of masked data", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.69041442871094, 228.79293823242188, 315.4986267089844, 239.35397338867188], "page": 101, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "GLYPH Triggers and functions (SECURED)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.63272094726562, 217.0623016357422, 307.3042297363281, 227.76193237304688], "page": 101, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC is only one part of the solution", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.63467407226562, 204.85337829589844, 315.1477355957031, 215.71888732910156], "page": 101, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.9858283996582, 27.747447967529297, 257.24334716796875, 37.346683502197266], "page": 101, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "85", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7403564453125, 27.93828010559082, 547.2591552734375, 37.66413879394531], "page": 101, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "6", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 101, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "6.1 Timing of column masking", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.65674591064453, 702.2841796875, 298.45440673828125, 718.27685546875], "page": 102, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "An important design and implementation consideration is the fact that RCAC column masking occurs after all of the query processing is complete, which means that the query results are not at all based on the masked values. Any local selection, joining, grouping, or ordering operations are based on the unmasked column values. Only the final result set is the target of the masking.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.86968994140625, 627.637451171875, 547.2496337890625, 685.9827270507812], "page": 102, "span": [0, 381], "__ref_s3_data": null}]}, {"text": "An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9619598388672, 593.6736450195312, 547.2325439453125, 615.8494873046875], "page": 102, "span": [0, 140], "__ref_s3_data": null}]}, {"text": "SELECT", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [222.84666442871094, 559.6641845703125, 250.23239135742188, 568.2152099609375], "page": 102, "span": [0, 6], "__ref_s3_data": null}]}, {"text": "FROM GROUP BY ORDER BY", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [222.54440307617188, 515.1958618164062, 263.9931640625, 545.9962768554688], "page": 102, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "Without RCAC Masking", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [160.45700073242188, 481.6206970214844, 285.03265380859375, 494.0350036621094], "page": 102, "span": [0, 20], "__ref_s3_data": null}]}, {"text": "With RCAC Masking", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [341.0262451171875, 481.6206970214844, 447.7765808105469, 493.9162292480469], "page": 102, "span": [0, 17], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/14"}, {"text": "Figure 6-1 Timing of column masking", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.15956115722656, 310.1897888183594, 289.9644775390625, 319.9989013671875], "page": 102, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/15"}, {"text": "CREDIT_CARD_NUMBER, SUM(AMOUNT) AS TOTAL TRANSACTIONS", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [289.7908630371094, 537.4301147460938, 389.4005432128906, 568.0421142578125], "page": 102, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "CREDIT_CARD_NUMBER", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [290.2843017578125, 526.31298828125, 383.71990966796875, 534.2352905273438], "page": 102, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "CREDIT_CARD_NUMBER;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [289.9924621582031, 514.7672729492188, 386.2455749511719, 524.0040283203125], "page": 102, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "86", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.49551391601562, 27.93828010559082, 78.4020004272461, 37.467079162597656], "page": 102, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.642459869384766, 334.4214172363281, 37.34343719482422], "page": 102, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.79122924804688, 626.829345703125, 547.1474609375, 721.4331665039062], "page": 103, "span": [0, 652], "__ref_s3_data": null}]}, {"text": "Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.33775329589844, 563.2588500976562, 540.7468872070312, 609.4223022460938], "page": 103, "span": [0, 262], "__ref_s3_data": null}]}, {"text": "Figure 6-2 Masking differences between Fieldproc and RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.1103515625, 250.29544067382812, 386.60394287109375, 260.0503845214844], "page": 103, "span": [0, 57], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/88"}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.2842712402344, 27.846771240234375, 523.6287841796875, 37.037837982177734], "page": 103, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "87", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7338256835938, 27.93828010559082, 547.2591552734375, 37.56776428222656], "page": 103, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "6.2 RCAC effects on data movement", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.67425537109375, 706.0162963867188, 342.9832458496094, 721.5018310546875], "page": 104, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "As described earlier and shown in Figure 6-3, RCAC is applied pervasively regardless of the data access programming interface, SQL statement, or IBM i command. The effects of RCAC on data movement scenarios can be profound and possibly problematic. It is important to understand these effects and make the appropriate adjustments to avoid incorrect results or data loss.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.64599609375, 631.2990112304688, 547.2276000976562, 689.4252319335938], "page": 104, "span": [0, 370], "__ref_s3_data": null}]}, {"text": "Figure 6-3 RCAC and data movement", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.05140686035156, 491.0803527832031, 292.9797668457031, 500.5362854003906], "page": 104, "span": [0, 33], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/89"}, {"text": "The \"user\" that is running the data movement application or process, whether it be a high availability (HA) scenario, an extract, transform, load (ETL) scenario, or just copying data from one file or table to another one, must have permission to all the source rows without masking, and not be restricted from putting rows into the target. Allowing the data movement application or process to bypass the RCAC rules must be based on a clear and concise understanding of the organization's object security and data access policy. Proper design, implementation, and testing are critical success factors when applying RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8123321533203, 392.3400573730469, 547.2745361328125, 474.5461730957031], "page": 104, "span": [0, 619], "__ref_s3_data": null}]}, {"text": "Important: RCAC is applied to the table or physical file access. It is not applied to the journal receiver access. Any and all database transactions are represented in the journal regardless of RCAC row permissions and column masks. This makes it essential that IBM i security is used to ensure that only authorized personnel have access to the journaled data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [141.9169158935547, 316.03607177734375, 536.527587890625, 374.77313232421875], "page": 104, "span": [0, 360], "__ref_s3_data": null}]}, {"text": "This section covers in detail the following three examples:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.920166015625, 287.2419738769531, 390.6604919433594, 297.3553771972656], "page": 104, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "GLYPH Effects when RCAC is defined on the source table", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.67665100097656, 270.45928955078125, 372.0890197753906, 280.7431335449219], "page": 104, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "GLYPH Effects when RCAC is defined on the target table", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6602020263672, 258.45947265625, 367.72723388671875, 268.80596923828125], "page": 104, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "GLYPH Effects when RCAC is defined on both source and target tables", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.55271911621094, 246.36570739746094, 430.467529296875, 256.64398193359375], "page": 104, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "6.2.1 Effects when RCAC is defined on the source table", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.39418029785156, 213.8573760986328, 407.9704895019531, 226.74691772460938], "page": 104, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Example 6-1 shows a simple example that illustrates the effect of RCAC as defined on the source table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.16326904296875, 178.47857666015625, 536.1681518554688, 200.64059448242188], "page": 104, "span": [0, 102], "__ref_s3_data": null}]}, {"text": "Example 6-1 INSERT INTO TARGET statement", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.6311798095703, 157.3170928955078, 331.9786682128906, 166.7194366455078], "page": 104, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "INSERT INTO TARGET (SELECT * FROM SOURCE);", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.79693603515625, 139.6705780029297, 346.6770935058594, 149.1321258544922], "page": 104, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "88", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.45632934570312, 27.93828010559082, 78.4020004272461, 37.49223327636719], "page": 104, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.3826904296875, 27.736873626708984, 334.4214172363281, 37.29977798461914], "page": 104, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "For example, given a \"source\" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that has the RCAC rules applied. This reduced and modified result set is inserted into the \"target\" table even though the query is defined as returning all rows and all columns. Instead of seven rows that are selected from the source, only three rows are returned and placed into the target, as shown in Figure 6-4.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0976104736328, 650.8802490234375, 547.2900390625, 721.2063598632812], "page": 105, "span": [0, 516], "__ref_s3_data": null}]}, {"text": "Figure 6-4 RCAC effects on data movement from SOURCE", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.33627319335938, 375.21319580078125, 377.8245544433594, 384.5627136230469], "page": 105, "span": [0, 52], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/90"}, {"text": "6.2.2 Effects when RCAC is defined on the target table", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3865737915039, 343.17596435546875, 401.6576843261719, 356.1142578125], "page": 105, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Example 6-2 shows a simple example that illustrates the effect of RCAC as defined on the target table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.10086059570312, 307.6630859375, 536.1681518554688, 329.97418212890625], "page": 105, "span": [0, 102], "__ref_s3_data": null}]}, {"text": "Example 6-2 INSERT INTO TARGET statement", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 286.7970886230469, 331.8053894042969, 295.9055480957031], "page": 105, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "INSERT INTO TARGET (SELECT * FROM SOURCE);", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.8000030517578, 268.98992919921875, 346.6770935058594, 278.5483703613281], "page": 105, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.2767333984375, 27.871395111083984, 523.6287841796875, 37.01155090332031], "page": 105, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "89", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.7984619140625, 27.93828010559082, 547.2591552734375, 37.58755111694336], "page": 105, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "90", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.30988311767578, 27.93828010559082, 78.4020004272461, 37.60688781738281], "page": 106, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.724334716796875, 334.4214172363281, 37.2808952331543], "page": 106, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Given a \"target\" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that represents all the rows and columns. The seven row result set is inserted into the \"target\", and the RCAC row permission causes an error to be returned, as shown in Figure 6-5. The source rows where NAME = 'CAIN' do not satisfy the target table's permission, and therefore cannot be inserted. In other words, you are inserting data that you cannot read.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.002685546875, 651.1795654296875, 547.2645874023438, 721.380859375], "page": 106, "span": [0, 546], "__ref_s3_data": null}]}, {"text": "Figure 6-5 RCAC effects on data movement on TARGET", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.1909637451172, 368.4858703613281, 367.20880126953125, 377.8680419921875], "page": 106, "span": [0, 50], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/91"}, {"text": "6.2.3 Effects when RCAC is defined on both source and target tables", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.30170440673828, 336.48931884765625, 490.12786865234375, 349.3372497558594], "page": 106, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "Example 6-3 shows a simple example that illustrates the effect of RCAC as defined on both the source and the target tables.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.27003479003906, 301.178466796875, 541.6332397460938, 323.1507568359375], "page": 106, "span": [0, 123], "__ref_s3_data": null}]}, {"text": "Example 6-3 INSERT INTO TARGET statement", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.31845092773438, 280.07708740234375, 332.0157775878906, 289.33172607421875], "page": 106, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "INSERT INTO TARGET (SELECT * FROM SOURCE);", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.64996337890625, 262.2247314453125, 346.6770935058594, 271.6866760253906], "page": 106, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "Given a \"source\" table and a \"target\" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that has the RCAC rules applied. This reduced and modified result set is inserted into the \"target\" table even though the query is defined as returning all rows and all columns. Instead of seven rows that are selected from the source, only three rows are returned.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06167602539062, 173.19976806640625, 547.2467041015625, 243.45176696777344], "page": 106, "span": [0, 473], "__ref_s3_data": null}]}, {"text": "Although the source rows where NAME <> 'CAIN' do satisfy the target table's permission, the AMOUNT column value of 999.99 represents masked data and therefore cannot be inserted. An error is returned indicating the failure, as shown in Figure 6-6. In this scenario, DB2 is protecting against an overt attempt to insert masked data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.64163208007812, 674.9846801757812, 547.2501831054688, 721.2555541992188], "page": 107, "span": [0, 331], "__ref_s3_data": null}]}, {"text": "Figure 6-6 RCAC effects on data movement on SOURCE and TARGET", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.189453125, 395.10968017578125, 425.718017578125, 404.72088623046875], "page": 107, "span": [0, 61], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/92"}, {"text": "6.3 RCAC effects on joins", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.65412139892578, 351.7353515625, 263.02801513671875, 367.916748046875], "page": 107, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "As mentioned previously, a fundamental concept of row permission is that it defines a logical subset of rows that a user or group of users is permitted to access and use. This subset becomes the new basis of any query against the table that has RCAC enabled.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89974975585938, 301.1240539550781, 546.7406005859375, 335.3844909667969], "page": 107, "span": [0, 258], "__ref_s3_data": null}]}, {"text": "Note: Thinking of the row permission as defining a virtual set of rows that can be operated on is the secret to understanding the effect of RCAC on any join operation.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.38418579101562, 260.81829833984375, 541.3812255859375, 283.3266296386719], "page": 107, "span": [0, 167], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.33331298828125, 27.853492736816406, 523.6287841796875, 37.027339935302734], "page": 107, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "91", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.72705078125, 27.93828010559082, 547.2591552734375, 37.5089225769043], "page": 107, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "92", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.33194732666016, 27.93828010559082, 78.4020004272461, 37.64910888671875], "page": 108, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.734079360961914, 334.4214172363281, 37.32621383666992], "page": 108, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "As shown in Figure 6-7, there are two different sets, set A and set B. However, set B has a row permission that subsets the rows that a user can see.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.87557983398438, 699.1170654296875, 537.429931640625, 721.29052734375], "page": 108, "span": [0, 149], "__ref_s3_data": null}]}, {"text": "Figure 6-7 Set A and set B with row permissions", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.1213836669922, 463.05242919921875, 334.17889404296875, 472.4340515136719], "page": 108, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/93"}, {"text": "6.3.1 Inner joins", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3143310546875, 430.5066833496094, 166.59303283691406, 443.1670837402344], "page": 108, "span": [0, 17], "__ref_s3_data": null}]}, {"text": "Inner join defines the intersection of two data sets. For a row to be returned from the inner join query, it must appear in both sets, as shown in Figure 6-8.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0799102783203, 395.0136413574219, 547.21875, 417.2611999511719], "page": 108, "span": [0, 158], "__ref_s3_data": null}]}, {"text": "Figure 6-8 Inner join without RCAC permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0496826171875, 157.818115234375, 327.55682373046875, 167.18350219726562], "page": 108, "span": [0, 45], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/94"}, {"text": "Given that row permission serves to eliminate logically rows from one or more sets, the result set from an inner join (and a subquery) can be different when RCAC is applied. RCAC can reduce the number of rows that are permitted to be accessed by the join, as shown in Figure 6-9.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.28961181640625, 675.0590209960938, 547.3219604492188, 721.3756103515625], "page": 109, "span": [0, 279], "__ref_s3_data": null}]}, {"text": "Effect of column masks on inner joins: Because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [141.91893005371094, 622.695068359375, 537.6323852539062, 657.2465209960938], "page": 109, "span": [0, 207], "__ref_s3_data": null}]}, {"text": "Figure 6-9 Inner join with RCAC permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.10440063476562, 359.0824890136719, 314.9508972167969, 368.5648498535156], "page": 109, "span": [0, 42], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/95"}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.2262878417969, 27.840274810791016, 523.6287841796875, 37.05694580078125], "page": 109, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "93", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6971435546875, 27.93828010559082, 547.2591552734375, 37.44646072387695], "page": 109, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "6.3.2 Outer joins", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3655014038086, 708.4302978515625, 169.4800567626953, 721.3550415039062], "page": 110, "span": [0, 17], "__ref_s3_data": null}]}, {"text": "Outer joins preserve one or both sides of two data sets. A row can be returned from the outer join query if it appears in the primary set (LEFT, RIGHT, or both in the case of FULL), as shown in Figure 6-10. Column values from the secondary set are returned if the row has a match in the primary set. Otherwise, NULL is returned for the column value by default.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.71127319335938, 649.0245971679688, 547.2286376953125, 695.3336791992188], "page": 110, "span": [0, 360], "__ref_s3_data": null}]}, {"text": "Figure 6-10 Outer join without RCAC permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.24244689941406, 407.2479553222656, 334.27374267578125, 416.5892333984375], "page": 110, "span": [0, 46], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/96"}, {"text": "94", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.35562133789062, 27.93828010559082, 78.4020004272461, 37.65726852416992], "page": 110, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.70145034790039, 334.4722595214844, 37.30113983154297], "page": 110, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Given that row permission serves to eliminate logically rows from one or more sets, more column values that are returned from the secondary table in outer join can be NULL when RCAC is applied, as shown in Figure 6-11.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.098388671875, 686.85498046875, 535.2982177734375, 721.3606567382812], "page": 111, "span": [0, 218], "__ref_s3_data": null}]}, {"text": "Effect of column masks on inner joins: Because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [141.93408203125, 634.7100219726562, 537.6323852539062, 669.4381713867188], "page": 111, "span": [0, 207], "__ref_s3_data": null}]}, {"text": "Figure 6-11 Outer join with RCAC permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.07041931152344, 357.96063232421875, 321.8915710449219, 367.32086181640625], "page": 111, "span": [0, 43], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/97"}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.27435302734375, 27.857723236083984, 523.6287841796875, 37.033607482910156], "page": 111, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "95", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.671142578125, 27.93828010559082, 547.2591552734375, 37.526145935058594], "page": 111, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "6.3.3 Exception joins", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3558349609375, 708.388671875, 196.83258056640625, 721.2674560546875], "page": 112, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "Exception joins preserve one side of two data sets. A row can be returned from the exception join query if it appears in the primary set (LEFT or RIGHT) and the row does not appear in the secondary set, as shown in Figure 6-12. Column values from the secondary set are returned as NULL by default.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.830810546875, 648.7778930664062, 547.2914428710938, 695.349609375], "page": 112, "span": [0, 297], "__ref_s3_data": null}]}, {"text": "Figure 6-12 Exception join without RCAC permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.1206512451172, 385.1718444824219, 351.78106689453125, 394.6317138671875], "page": 112, "span": [0, 50], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/98"}, {"text": "Given that row permission serves to eliminate logically rows from one or more sets, more rows can appear to be exceptions when RCAC is applied, as shown in Figure 6-13. Also, because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.073486328125, 322.1929931640625, 544.3384399414062, 368.567138671875], "page": 112, "span": [0, 343], "__ref_s3_data": null}]}, {"text": "Figure 6-13 Exception join with RCAC permission", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.24554443359375, 60.59693908691406, 339.181640625, 69.93528747558594], "page": 112, "span": [0, 47], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/99"}, {"text": "96", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.30990600585938, 27.93828010559082, 78.4020004272461, 37.535587310791016], "page": 112, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.766407012939453, 334.4214172363281, 37.29237747192383], "page": 112, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "6.4 Monitoring, analyzing, and debugging with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.58145904541016, 705.5357666015625, 469.4769287109375, 721.6285400390625], "page": 113, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "It is assumed (and it is a critical success factor) that the database engineer or application developer has a thorough understanding of the DB2 for i Query Optimizer, Database Engine, and all the associated tools and techniques.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.05673217773438, 655.0875854492188, 547.2247314453125, 689.4639892578125], "page": 113, "span": [0, 228], "__ref_s3_data": null}]}, {"text": "The monitoring, analyzing, and debugging process basically stays the same when RCAC row permissions or column masks are in place, with a few important differences:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.80198669433594, 621.1078491210938, 547.1968383789062, 643.4088134765625], "page": 113, "span": [0, 163], "__ref_s3_data": null}]}, {"text": "GLYPH The underlying data access plan can be different and more complex based on the rule text.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.58511352539062, 592.2996215820312, 534.2526245117188, 614.6841430664062], "page": 113, "span": [0, 105], "__ref_s3_data": null}]}, {"text": "GLYPH The database results can be reduced or modified based on the rule text and user profile.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.75184631347656, 575.1990966796875, 541.5543212890625, 585.4160766601562], "page": 113, "span": [0, 104], "__ref_s3_data": null}]}, {"text": "GLYPH The run time of the request can be affected either positively or negatively based on the rule text.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.37033081054688, 546.2804565429688, 536.0465087890625, 568.6742553710938], "page": 113, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "GLYPH For high-level language record level access, query plans must be considered, and not just program code.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5784149169922, 516.7872314453125, 547.224609375, 539.462890625], "page": 113, "span": [0, 119], "__ref_s3_data": null}]}, {"text": "During analyzing and debugging, it is important to account for all of the RCAC definitions for each table or file to understand the logic and corresponding work that is associated with processing the row permissions and column masks. It is also important to realize that, depending on the user profile in effect at run time, the database actions and query results can be different.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.09255981445312, 447.2820129394531, 547.2296752929688, 505.3416442871094], "page": 113, "span": [0, 381], "__ref_s3_data": null}]}, {"text": "RCAC is designed and implemented to be transparent to the user. It is possible for user \"Mike\" and user \"Hernando\" to run the exact same query, against the exact same data on the exact same system, and get different result sets. There is no error, no warning, and no indication that RCAC reduced or modified the respective answers that are returned. Furthermore, it is also likely that user \"Mike\" and user \"Hernando\" have different query run times even though it appears that everything is the same for both users. The actual query plan contains the RCAC logic, and this additional code path can alter the amount of work that is needed to produce results, based on the user running the query.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.950927734375, 341.0733337402344, 547.2786254882812, 435.3935241699219], "page": 113, "span": [0, 693], "__ref_s3_data": null}]}, {"text": "When monitoring, analyzing, and debugging a database process when RCAC is enabled, it is critical to keep as many of the \"variables\" the same as possible. Use a good scientific process. For example, when re-creating a problem situation running under the same user profile with the same data and under the same conditions, it is almost mandatory. Otherwise, the database behavior and query results can be different.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.83775329589844, 271.2449035644531, 547.328369140625, 329.3033142089844], "page": 113, "span": [0, 414], "__ref_s3_data": null}]}, {"text": "To successfully perform monitoring, analyzing, and debugging when RCAC is enabled likely involves changes in the security and data access policies of the organization, and require new responsibilities, authority, and oversight within the data-centric application development community. As such, establishing and staffing the position of \"database engineer\" becomes even more important.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03610229492188, 201.1825714111328, 547.2515869140625, 259.4459228515625], "page": 113, "span": [0, 385], "__ref_s3_data": null}]}, {"text": "6.4.1 Query monitoring and analysis tools", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.31684112548828, 168.40090942382812, 325.99066162109375, 181.35137939453125], "page": 113, "span": [0, 41], "__ref_s3_data": null}]}, {"text": "When monitoring and collecting metrics on database requests, DB2 for i provides additional information that indicates row permissions or column masks are being applied. This information is integrated and part of the standard tools, such as Visual Explain, SQL Plan Cache Snapshot, and SQL Performance Monitor.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.78477478027344, 109.23886108398438, 543.2037353515625, 155.49432373046875], "page": 113, "span": [0, 309], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.31317138671875, 27.83702850341797, 523.6287841796875, 37.11963653564453], "page": 113, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "97", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6370849609375, 27.93828010559082, 547.2591552734375, 37.61383819580078], "page": 113, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Figure 6-14 shows how Visual Explain externalizes RCAC.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3003387451172, 710.93408203125, 394.5509033203125, 721.3932495117188], "page": 114, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "Figure 6-14 Visual Explain indicating that RCAC is applied", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.32495880126953, 430.2875671386719, 301.67059326171875, 439.9020690917969], "page": 114, "span": [0, 58], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/100"}, {"text": "Figure 6-15 shows the main dashboard of an SQL Performance Monitor. Click Summary .", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.37713623046875, 403.63275146484375, 529.9888916015625, 413.7968444824219], "page": 114, "span": [0, 83], "__ref_s3_data": null}]}, {"text": "Figure 6-15 SQL Performance Monitor", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.45510864257812, 237.89434814453125, 223.10508728027344, 247.1595458984375], "page": 114, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/101"}, {"text": "Figure 6-16 shows the summary of an SQL Performance Monitor with an indication that RCAC is applied.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.24908447265625, 199.04591369628906, 524.7570190429688, 221.4222412109375], "page": 114, "span": [0, 100], "__ref_s3_data": null}]}, {"text": "Figure 6-16 SQL Performance Monitor indicating that RCAC is applied", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.33348846435547, 94.5608901977539, 349.3365173339844, 103.83731079101562], "page": 114, "span": [0, 67], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/102"}, {"text": "98", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.26004028320312, 27.93828010559082, 78.4020004272461, 37.59291458129883], "page": 114, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.39844512939453, 27.74201011657715, 334.4214172363281, 37.295169830322266], "page": 114, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Figure 6-17 shows the statements of an SQL Performance Monitor and how RCAC is externalized.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.0850372314453, 699.2781372070312, 514.509765625, 721.4401245117188], "page": 115, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "Figure 6-17 SQL Performance Monitor showing statements and RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.54953002929688, 562.5570068359375, 349.6691589355469, 571.9047241210938], "page": 115, "span": [0, 63], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/103"}, {"text": "When implementing RCAC as part of a comprehensive and pervasive data access control initiative, consider that the database monitoring and analysis tools can collect literal values that are passed as part of SQL statements. These literal values can be viewed as part of the information collected. If any of the literals are based on or are used with masked columns, it is important to review the database engineer's policy for viewing these data elements. For example, supposed that column CUSTOMER_TAX_ID is deemed masked for the database engineer and the CUSTOMER_TAX_ID column is used in a predicate as follows:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9917755126953, 463.65936279296875, 547.1959838867188, 546.1871948242188], "page": 115, "span": [0, 613], "__ref_s3_data": null}]}, {"text": "WHERE CUSTOMER_TAX_ID = '123-45-7890'", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.02809143066406, 445.86871337890625, 321.6575622558594, 456.53887939453125], "page": 115, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "The literal value of '123-45-7890' is visible to the analyst, effectively exposing sensitive information. If this is not acceptable, you must implement the SYSPROC.SET_COLUMN_ATTRIBUTE procedure.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9283447265625, 400.6603698730469, 520.1124877929688, 434.6978454589844], "page": 115, "span": [0, 195], "__ref_s3_data": null}]}, {"text": "The SET_COLUMN_ATTRIBUTE procedure sets the SECURE attribute for a column so that variable values that are used for the column cannot be seen in the SQL Performance Monitor, SQL Plan Cache Snapshot, or Visual Explain.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.99343872070312, 354.4087829589844, 547.264404296875, 388.86968994140625], "page": 115, "span": [0, 217], "__ref_s3_data": null}]}, {"text": "6.4.2 Index advisor", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.30596160888672, 322.03729248046875, 184.44000244140625, 334.6505432128906], "page": 115, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "Because the RCAC rule text can be almost any valid SQL logic, including local selection predicates, join conditions, and subqueries, the standard query tuning techniques still apply. Without a doubt, a proper and adequate indexing strategy is a good starting point.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0715789794922, 274.205810546875, 544.1452026367188, 308.6105651855469], "page": 115, "span": [0, 265], "__ref_s3_data": null}]}, {"text": "The index advisor is not specifically enhanced for RCAC, but because the rule text is a fully integrated part of the query plan, any opportunities for indexing is advised based on the current Query Optimizer functionality. If an index is advised because of the RCAC rule text logic, there is no RCAC reason code provided. Analyzing the query plan and the RCAC rule text provides the understanding as to why the index is being advised.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9353485107422, 204.24913024902344, 543.59814453125, 262.8050231933594], "page": 115, "span": [0, 434], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [376.3026428222656, 27.829248428344727, 523.6287841796875, 37.070919036865234], "page": 115, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "99", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.6923828125, 27.93828010559082, 547.2591552734375, 37.61021041870117], "page": 115, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "For example, the query that is shown in Figure 6-18 produces index advice for the user's predicate and the RCAC predicate.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1690673828125, 698.9784545898438, 529.2249145507812, 721.1625366210938], "page": 116, "span": [0, 122], "__ref_s3_data": null}]}, {"text": "Figure 6-18 Index advice and RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.9779510498047, 401.4017639160156, 286.63140869140625, 410.4287109375], "page": 116, "span": [0, 33], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/104"}, {"text": "In Figure 6-19, index advisor is showing an index for the ACCOUNTS and CUSTOMERS tables based on the RCAC rule text.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1080322265625, 362.8584899902344, 530.6282958984375, 384.9075927734375], "page": 116, "span": [0, 116], "__ref_s3_data": null}]}, {"text": "Figure 6-19 Index advisor based on the RCAC rule", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.38304901123047, 225.15142822265625, 271.9134216308594, 234.57513427734375], "page": 116, "span": [0, 48], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/105"}, {"text": "For more information about creating and using indexes, see IBM DB2 for i indexing methods and strategies , found at:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2445526123047, 186.40867614746094, 545.009521484375, 208.71290588378906], "page": 116, "span": [0, 116], "__ref_s3_data": null}]}, {"text": "http://www.ibm.com/partnerworld/wps/servlet/ContentHandler/stg_ast_sys_wp_db2_i_in dexing_methods_strategies", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0568389892578, 157.3316192626953, 546.534423828125, 179.93490600585938], "page": 116, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "6.4.3 Metadata using catalogs", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.29796600341797, 124.90902709960938, 251.73373413085938, 138.01373291015625], "page": 116, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "To make the discovery and identification of RCAC row permissions and column masks programmatically, query the QSYS2.SYSCONTROLS catalog view or the QSYS2.SYSCONTROLSDEP catalog view directly. Otherwise, the System i Navigator Database graphical interface can be used interactively.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81069946289062, 65.56778717041016, 519.360595703125, 112.09072875976562], "page": 116, "span": [0, 281], "__ref_s3_data": null}]}, {"text": "100", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.58773803710938, 27.93828010559082, 83.98200225830078, 37.618370056152344], "page": 116, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.61930084228516, 27.79102897644043, 339.92132568359375, 37.225154876708984], "page": 116, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Figure 6-20 shows the QSYS2.SYSCONTROLS catalog view.", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.2389678955078, 711.0005493164062, 409.46722412109375, 721.5032958984375], "page": 117, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "Figure 6-20 RCAC and catalogs", "type": "paragraph", "name": "paragraph", "font": null, "prov": [{"bbox": [64.65972137451172, 536.8526000976562, 197.23672485351562, 546.6951904296875], "page": 117, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "The SYSCONTROLS catalog view contains the following columns:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.26268005371094, 510.36981201171875, 430.36700439453125, 520.7019653320312], "page": 117, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "GLYPH COLUMN_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.75210571289062, 493.5987243652344, 229.3837890625, 503.4615173339844], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH CONTROL_TYPE", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.86070251464844, 481.5989074707031, 231.54153442382812, 491.64453125], "page": 117, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "GLYPH CREATE_TIME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6691436767578, 469.5990905761719, 219.97596740722656, 479.86834716796875], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH ENABLE", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7273712158203, 457.5992736816406, 190.9468536376953, 467.8027648925781], "page": 117, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "GLYPH ENFORCED", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.64234924316406, 445.5994567871094, 207.2530517578125, 455.4942626953125], "page": 117, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "GLYPH ASP_NUMBER", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.51791381835938, 433.5996398925781, 220.03372192382812, 443.4760437011719], "page": 117, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "GLYPH IMPLICIT", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5097198486328, 421.5998229980469, 193.41262817382812, 431.81707763671875], "page": 117, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "GLYPH LABEL", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7074737548828, 409.6000061035156, 182.29428100585938, 419.560302734375], "page": 117, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "GLYPH LAST_ALTERED", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7837677001953, 397.6001892089844, 226.72982788085938, 407.70257568359375], "page": 117, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "GLYPH LONG_COMMENT", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5884246826172, 385.6003723144531, 236.8487548828125, 395.9312744140625], "page": 117, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6427001953125, 373.6005554199219, 213.68124389648438, 383.6861877441406], "page": 117, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC_OWNER", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4962921142578, 361.6007385253906, 222.89993286132812, 372.0569763183594], "page": 117, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC_SCHEMA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.67913818359375, 349.6009216308594, 227.64552307128906, 359.7144470214844], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH RULETEXT", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7525634765625, 337.6011047363281, 202.9622802734375, 347.8081970214844], "page": 117, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "GLYPH SYSTEM_COLUMN_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.67474365234375, 325.6012878417969, 275.5697021484375, 335.66265869140625], "page": 117, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "GLYPH SYSTEM_TABLE_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.66769409179688, 313.6014709472656, 262.74969482421875, 323.5975341796875], "page": 117, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "GLYPH SYSTEM_TABLE_SCHEMA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.64051818847656, 301.6016540527344, 276.5843505859375, 311.68450927734375], "page": 117, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "GLYPH TABLE_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7092742919922, 289.6018371582031, 216.03579711914062, 299.60284423828125], "page": 117, "span": [0, 26], "__ref_s3_data": null}]}, {"text": "GLYPH TABLE_SCHEMA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.76608276367188, 277.6020202636719, 230.14419555664062, 287.8016052246094], "page": 117, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "GLYPH TBCORRELATION", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6082763671875, 265.6022033691406, 235.10260009765625, 275.61505126953125], "page": 117, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "The SYSCONTROLSDEP catalog view contains the following columns:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.31414794921875, 242.9756622314453, 451.0119934082031, 253.38050842285156], "page": 117, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH COLUMN_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60992431640625, 226.60281372070312, 229.24020385742188, 236.86697387695312], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH CONTROL_TYPE", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.8683319091797, 214.60301208496094, 231.54153442382812, 224.8516845703125], "page": 117, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "GLYPH IASP_NUMBER", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.71621704101562, 202.60321044921875, 222.8195343017578, 212.88685607910156], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH OBJECT_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.70372009277344, 190.60340881347656, 225.03065490722656, 201.20953369140625], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH OBJECT_SCHEMA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7957763671875, 178.60360717773438, 239.2458953857422, 189.06430053710938], "page": 117, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "GLYPH OBJECT_TYPE", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60643005371094, 166.6038055419922, 222.27175903320312, 177.22776794433594], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH PARM_SIGNATURE", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5543670654297, 154.60400390625, 241.48854064941406, 165.41989135742188], "page": 117, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.70533752441406, 142.6042022705078, 213.68124389648438, 153.0066375732422], "page": 117, "span": [0, 25], "__ref_s3_data": null}]}, {"text": "GLYPH RCAC_SCHEMA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7023162841797, 130.60440063476562, 227.54257202148438, 140.852783203125], "page": 117, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "GLYPH SYSTEM_TABLE_NAME", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.80105590820312, 118.6045913696289, 262.728271484375, 129.09344482421875], "page": 117, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "GLYPH SYSTEM_TABLE_SCHEMA", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.57916259765625, 106.60478210449219, 276.6519470214844, 117.10379791259766], "page": 117, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "For more information, see the IBM i 7.2 DB2 for i SQL Reference Guide , found at:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.38717651367188, 84.58521270751953, 495.9486389160156, 94.74239349365234], "page": 117, "span": [0, 81], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/db2/rbafzintro.htm?lang =en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.96670532226562, 55.894439697265625, 546.534423828125, 78.2451400756836], "page": 117, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [370.9178161621094, 28.03982925415039, 517.9691772460938, 37.072052001953125], "page": 117, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "101", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.4176025390625, 27.93828010559082, 547.2587890625, 37.66716003417969], "page": 117, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "6.5 Views, materialized query tables, and query rewrite with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.68630981445312, 687.0557861328125, 524.18310546875, 721.531005859375], "page": 118, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "This section covers the implications to views, materialized query tables (MQTs), and query rewrite when RCAC is activated on a table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.02333068847656, 648.2785034179688, 538.62841796875, 670.8335571289062], "page": 118, "span": [0, 133], "__ref_s3_data": null}]}, {"text": "6.5.1 Views", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.40193939208984, 615.67724609375, 137.4498748779297, 628.7596435546875], "page": 118, "span": [0, 11], "__ref_s3_data": null}]}, {"text": "Any access to an SQL view that is over one or more tables that have RCAC also have those row permissions and column masking rules applied. If an SQL view has predicates, those are logically ANDed with any search condition that is specified in the permissions that are defined on the underlying tables. The view does not have to project the columns that are referenced by the permissions. Figure 6-21 shows an example of a view definition and user query.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9208526611328, 544.2852172851562, 547.2675170898438, 602.7634887695312], "page": 118, "span": [0, 453], "__ref_s3_data": null}]}, {"text": "Figure 6-21 View definition and user query", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.18788146972656, 249.9243621826172, 311.72760009765625, 259.39349365234375], "page": 118, "span": [0, 42], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/106"}, {"text": "102", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.70126342773438, 27.93828010559082, 83.98200225830078, 37.50507354736328], "page": 118, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.75050354003906, 27.767614364624023, 339.8848571777344, 37.27764892578125], "page": 118, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "What the query optimizer plans for and what the database engine runs is shown in the Figure 6-22.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91015625, 698.9927368164062, 519.4752807617188, 721.2984008789062], "page": 119, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "Figure 6-22 Query rewrite with RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.19911193847656, 392.20526123046875, 291.4627990722656, 402.051513671875], "page": 119, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/107"}, {"text": "6.5.2 Materialized query tables", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.50540161132812, 360.3772888183594, 255.48699951171875, 373.21600341796875], "page": 119, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "When the query to populate a materialized query table (MQT) is run by the system on either the create table or a refresh table, and one or more source tables have RCAC defined, the row permissions and column masks are ignored. This means that the MQT has all of the data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.82192993164062, 312.9986572265625, 547.2784423828125, 347.3133239746094], "page": 119, "span": [0, 271], "__ref_s3_data": null}]}, {"text": "Because the MQT is a copy of the base table data, when a permission is created on the base table, all the related MQTs are altered to have a default row permission. This default permission prevents any of the rows from being directly queried.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.07936096191406, 266.9794616699219, 547.2845458984375, 301.3565673828125], "page": 119, "span": [0, 242], "__ref_s3_data": null}]}, {"text": "When a query implicitly uses an MQT, the underlying row permissions and column masks are built into the query that uses the MQT. In order for the MQT to be used for optimization, the MQT must include any columns that are used by the row permissions and column masks.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.00421142578125, 220.8664093017578, 547.2724609375, 255.15774536132812], "page": 119, "span": [0, 266], "__ref_s3_data": null}]}, {"text": "The following example illustrates this scenario:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.21084594726562, 199.00047302246094, 342.15032958984375, 209.267333984375], "page": 119, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "1. Create schema and tables:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80001831054688, 181.96090698242188, 270.4134826660156, 191.80789184570312], "page": 119, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "CREATE SCHEMA Schema1;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.20018005371094, 165.2699432373047, 266.09869384765625, 174.04469299316406], "page": 119, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "CREATE TABLE Schema1.employee(userID varchar(128), LocationID integer, Regionid integer);", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.79075622558594, 141.2703399658203, 547.2555541992188, 163.08201599121094], "page": 119, "span": [0, 89], "__ref_s3_data": null}]}, {"text": "CREATE TABLE Schema1.Sales (INVOICE INTEGER NOT NULL, SALEAMT DECIMAL(5,2), TAXAMT DECIMAL(5,2), LOCATIONID INTEGER, REGIONID INTEGER);", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.46888732910156, 117.27072143554688, 531.0546264648438, 139.3306121826172], "page": 119, "span": [0, 135], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [370.84039306640625, 28.06173324584961, 517.9691772460938, 37.0665397644043], "page": 119, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "103", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.435302734375, 27.93828010559082, 547.2587890625, 37.50189971923828], "page": 119, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "104", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.5824203491211, 27.93828010559082, 83.98200225830078, 37.50394058227539], "page": 120, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.59403228759766, 27.74925994873047, 339.9430236816406, 37.371910095214844], "page": 120, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "2. Create a row permission that allows the employees to see only rows from the region they work in:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.20985412597656, 699.2785034179688, 545.8660888671875, 721.5411376953125], "page": 120, "span": [0, 99], "__ref_s3_data": null}]}, {"text": "/* Create permission that only allows the employees to see rows from the region they work in */ CREATE PERMISSION Schema1.Sales_PERM1 ON schema1.sales FOR ROWS WHERE CURRENT_USER in (SELECT userId FROM schema1.employee E WHERE e.regionid = regionid) ENFORCED FOR ALL ACCESS ENABLE;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [150.23133850097656, 598.3897705078125, 547.1957397460938, 692.9283447265625], "page": 120, "span": [0, 281], "__ref_s3_data": null}]}, {"text": "3. Create an MQT to summarize sales by location:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.14837646484375, 580.844482421875, 362.0214538574219, 591.3262939453125], "page": 120, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "-- Create MQT to summarize sales by location -- This has all of the data. The schema1.sales_perm1 predicate was not applied CREATE TABLE Schema1.Location_Sales_MQT as AS (SELECT LocationID, SUM(Saleamt) as Total_Location_Sales FROM SCHEMA1.SALES GROUP BY LOCATIONID) DATA INITIALLY DEFERRED REFRESH DEFERRED", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.20016479492188, 480.57073974609375, 545.9945678710938, 573.34423828125], "page": 120, "span": [0, 307], "__ref_s3_data": null}]}, {"text": "MAINTAINED BY USER;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.27374267578125, 468.5709228515625, 251.09893798828125, 478.4224548339844], "page": 120, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "4. Populate the MQT (permission is not applied):", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.06195068359375, 450.712158203125, 354.3462829589844, 461.3641052246094], "page": 120, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "/* Populate the MQT - Permission not applied here */ REFRESH TABLE Schema1.Location_Sales_MQT", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.20016479492188, 422.5517272949219, 416.03656005859375, 443.3263244628906], "page": 120, "span": [0, 93], "__ref_s3_data": null}]}, {"text": "The following query matches Location_Sales_MQT, but it cannot be used because it does not have column regionid, which is needed by the schema1.sales_PERM1 permission:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.42251586914062, 392.75518798828125, 547.1997680664062, 415.1828308105469], "page": 120, "span": [0, 166], "__ref_s3_data": null}]}, {"text": "SELECT Locationid, sum(SALEAMT) FROM schema1.sales GROUP BY locationid;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.20016479492188, 364.5924987792969, 401.0367736816406, 385.3670959472656], "page": 120, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "5. Create an MQT to summarize by region and location:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.57110595703125, 347.032958984375, 385.903564453125, 357.5345153808594], "page": 120, "span": [0, 53], "__ref_s3_data": null}]}, {"text": "-- MQT to summarize by region and location Create table schema1.Region_Location_Sales_MQT as AS (SELECT REGIONID, LocationID, SUM(Saleamt) as Total_Location_Sales FROM SCHEMA1.SALES GROUP BY REGIONID, LOCATIONID) DATA INITIALLY DEFERRED REFRESH DEFERRED MAINTAINED BY USER;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [150.25201416015625, 246.57443237304688, 500.9953308105469, 340.11907958984375], "page": 120, "span": [0, 273], "__ref_s3_data": null}]}, {"text": "6. Populate the Region_location_Sales_MQT (permission not applied):", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.15359497070312, 228.8771514892578, 452.1078186035156, 239.49000549316406], "page": 120, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "/* Populate the Region_location_Sales_MQT - Permission not applied here */ Refresh table schema1.Region_Location_Sales_MQT", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.35955810546875, 199.56198120117188, 535.9747924804688, 222.3959503173828], "page": 120, "span": [0, 122], "__ref_s3_data": null}]}, {"text": "The following query can use the Region_location_SALES_MQT because it has REGIONID, which is required for the schema1.sales_PERM1 permission:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.356689453125, 171.1383056640625, 502.06903076171875, 193.5333251953125], "page": 120, "span": [0, 140], "__ref_s3_data": null}]}, {"text": "SELECT Locationid, sum(SALEAMT) FROM schema1.sales GROUP BY locationid;", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [151.20018005371094, 142.53627014160156, 401.0367736816406, 163.31082153320312], "page": 120, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "This example has the following additional implications:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.85989379882812, 710.8369750976562, 376.0711669921875, 721.2571411132812], "page": 121, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "GLYPH Users must be prevented from explicitly querying the MQT or a view that is created over it. Those two cases bypass the row permission and column mask rules from the underlying tables.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5341796875, 670.2985229492188, 547.2718505859375, 704.5060424804688], "page": 121, "span": [0, 199], "__ref_s3_data": null}]}, {"text": "GLYPH If the user writes code to update incrementally an MQT, that code must be run from a user that has permission to view all of the rows and all columns in their unmasked state. Otherwise, the MQT contents are not complete and queries that implicitly use the MQT might get wrong results.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.635986328125, 616.6964721679688, 547.3106079101562, 663.2953491210938], "page": 121, "span": [0, 300], "__ref_s3_data": null}]}, {"text": "GLYPH To prevent this, a check constraint can be created to cause an error if masked data was inserted into the MQT.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.70452880859375, 588.2799072265625, 539.1951904296875, 610.5258178710938], "page": 121, "span": [0, 126], "__ref_s3_data": null}]}, {"text": "6.5.3 Query rewrite", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.31158447265625, 555.6721801757812, 184.48561096191406, 568.6481323242188], "page": 121, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "Query rewrite is a technique that the optimizer can use to change the original request to improve performance.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.25848388671875, 520.2984619140625, 527.1223754882812, 542.4752197265625], "page": 121, "span": [0, 110], "__ref_s3_data": null}]}, {"text": "For example, a query that references Table1 might be rewritten to access an MQT over Table1, or it might also be optimized to access only the fields in an index that is defined over Table1 and avoid touching Table1. With RCAC, defining these rewrites can still occur, but the MQT or index also must include all columns that are needed by the row permissions or column masks that are defined on Table1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0530548095703, 450.2796325683594, 547.158935546875, 508.5531311035156], "page": 121, "span": [0, 401], "__ref_s3_data": null}]}, {"text": "As part of adding RCAC, the impact to these potentially significant performance optimizations must be considered. Usage of MQTs or index-only access might be reduced or eliminated by enabling RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1699981689453, 403.54132080078125, 547.3839721679688, 438.06658935546875], "page": 121, "span": [0, 197], "__ref_s3_data": null}]}, {"text": "6.6 RCAC effects on performance and scalability", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.51546478271484, 360.6579895019531, 436.9425048828125, 376.25299072265625], "page": 121, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "As with any discussion that is related to performance and scalability, nothing is certain or guaranteed. There are always many variables that are involved. First, a good foundation of knowledge and skill is required to appreciate fully what is occurring when a database request is handled within an RCAC enabled environment. Implementing the row permission or column masks involves the query optimizer and database engine. The process that identifies the rows that you have permission to access is considered a \"query\", and as such a query plan must be formulated. In the case of SQL requests, the RCAC portion of the query is combined with the user's query, much like a query referencing a view.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.87310791015625, 249.67189025878906, 547.3291625976562, 344.3325500488281], "page": 121, "span": [0, 696], "__ref_s3_data": null}]}, {"text": "For native record level access, this RCAC \"query\" is also built and used to test the permission. When a file is opened, the RCAC rule text logic is included, optimized, and run as part of the native read, write, update, or delete operation. The amount of work (and time) required to identify the record based on the user's permission is directly related to the complexity and depth of the logic that is needed to identify the records that can be returned.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.00497436523438, 180.19772338867188, 547.2525634765625, 238.8109130859375], "page": 121, "span": [0, 455], "__ref_s3_data": null}]}, {"text": "A simple example to illustrate this concept is a random read using a keyed logical file (that is, an index). In its purest form, a random read uses two data access methods: index probe (find the key and RRN) and table probe (find the record using RRN). If the RCAC rule text specifies five nested subqueries to determine whether the user has access to the record, this logic must be added to the path. The subquery processing now becomes part of the original \"random read\" request. Instead of two simple I/Os to retrieve the record, there can be a minimum of 12 I/Os to retrieve the same record. These I/Os can be done with a result of \"not found\" if the user is not entitled to any of the records.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.76473999023438, 73.70987701416016, 547.2844848632812, 167.78012084960938], "page": 121, "span": [0, 698], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [370.8880615234375, 28.07004165649414, 517.9691772460938, 37.09161376953125], "page": 121, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "105", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.3849487304688, 27.93828010559082, 547.2587890625, 37.579017639160156], "page": 121, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "106", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.63807678222656, 27.93828010559082, 83.98200225830078, 37.42599105834961], "page": 122, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.7645034790039, 27.739063262939453, 339.9179382324219, 37.333919525146484], "page": 122, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "For programs that access records sequentially, in or out of key order, the added RCAC logic can have a profound effect on the performance and scalability. Reading the \"next record\" in order is no longer a simple matter of positioning to the next available key, as shown in Figure 6-23.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.05857849121094, 674.9677124023438, 543.5155029296875, 721.5306396484375], "page": 122, "span": [0, 285], "__ref_s3_data": null}]}, {"text": "Figure 6-23 Native record access with no RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.3397216796875, 378.350341796875, 333.39794921875, 388.2008972167969], "page": 122, "span": [0, 45], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/108"}, {"text": "Before the record, as identified by the key, is considered available, the RCAC logic must be run. If the record is rejected by RCAC, the next record in sequence that is permissible must be identified. This spinning through the records can take a long time and uses many resources, as shown in Figure 6-24.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0682373046875, 674.9595336914062, 547.295654296875, 721.4103393554688], "page": 123, "span": [0, 305], "__ref_s3_data": null}]}, {"text": "Figure 6-24 Native record level access with RCAC", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.14874267578125, 374.3591613769531, 341.5462951660156, 383.96697998046875], "page": 123, "span": [0, 48], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/109"}, {"text": "After the row permissions and column masks are designed and implemented, adequate performance and scalability testing are recommended.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06741333007812, 335.3708801269531, 525.8615112304688, 357.78802490234375], "page": 123, "span": [0, 134], "__ref_s3_data": null}]}, {"text": "6.7 Exclusive lock to implement RCAC (availability issues)", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.41442108154297, 292.18603515625, 510.04888916015625, 308.5864562988281], "page": 123, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "When defining permissions or enabling RCAC, an exclusive lock on the base table is obtained. The impact to other applications depends on the order of create permission and the alter table to activate RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7480926513672, 242.07867431640625, 547.2496948242188, 276.0059814453125], "page": 123, "span": [0, 205], "__ref_s3_data": null}]}, {"text": "Consider the following scenarios:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.33482360839844, 219.52285766601562, 283.20501708984375, 230.26702880859375], "page": 123, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "GLYPH Scenario 1: Adding permissions and RCAC is not enabled on the table:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.61669921875, 203.0792999267578, 464.85845947265625, 213.4561004638672], "page": 123, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "-Job 1 reading data from the table (open for input) holds a *SHRRD on the member and a *SHRRD on the data.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.18515014648438, 174.03993225097656, 547.4009399414062, 196.03309631347656], "page": 123, "span": [0, 106], "__ref_s3_data": null}]}, {"text": "-Job 2 adding, updating, or deleting rows from table (open for output) holds a *SHRRD on the member and a *SHRUPD on the data.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.15493774414062, 145.06031799316406, 546.0185546875, 167.4003143310547], "page": 123, "span": [0, 126], "__ref_s3_data": null}]}, {"text": "-Job 4 allocates the object and gets a *SHRRD on the file and a *EXCLRD on the data.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.300537109375, 127.89167022705078, 546.8192749023438, 138.0634307861328], "page": 123, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "-Job 3 attempts to add a permission to the table. Permission is added and the pseudo-closed cursors for Job1 and Job 2 are closed. Job 4 still holds the *SHRRD on the file and *EXCLRD on the data.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.9271697998047, 87.04132080078125, 547.1649169921875, 121.13821411132812], "page": 123, "span": [0, 196], "__ref_s3_data": null}]}, {"text": "The net result from Scenario 1 is that you can add permissions without having to end the applications that are reading the base table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.55613708496094, 57.62049102783203, 545.1102905273438, 80.00410461425781], "page": 123, "span": [0, 134], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [370.8280334472656, 28.034542083740234, 517.9691772460938, 37.04221725463867], "page": 123, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "107", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.3724975585938, 27.93828010559082, 547.2587890625, 37.605224609375], "page": 123, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "GLYPH Scenario 2: Altering a table to activate RCAC requires that all applications using the table be ended. The alter table requires exclusive use of the table.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.501953125, 699.1420288085938, 547.2257080078125, 721.4761352539062], "page": 124, "span": [0, 171], "__ref_s3_data": null}]}, {"text": "GLYPH Scenario 3: Altering the table to activate RCAC before the permissions are added. The alter table requires exclusive use of the table, as in scenario 2. All applications must be ended to perform this alter. After the alter is complete, any applications trying to read data do not get any results, and attempts to insert new rows returns the following message:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6831512451172, 645.1512451171875, 547.350341796875, 692.8861694335938], "page": 124, "span": [0, 375], "__ref_s3_data": null}]}, {"text": "SQ20471] INSERT or UPDATE does not satisfy row permissions.", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [150.52105712890625, 629.384521484375, 451.01605224609375, 639.4539184570312], "page": 124, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "To create a permission in this case requires that you end all the applications, unlike scenario 1 where permissions can be added while the applications were active. In this case, the applications must be ended to run the create permission.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.43138122558594, 588.2802734375, 532.7249145507812, 622.7317504882812], "page": 124, "span": [0, 239], "__ref_s3_data": null}]}, {"text": "6.8 Avoiding propagation of masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.41513061523438, 544.8763427734375, 380.354736328125, 561.1818237304688], "page": 124, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "Operations such as insert or update into a table with active column access control can fail if the input data is masked data. This can happen when data to be inserted or updated contains the masked value as a result of a SELECT from a table with active column access control.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.85028076171875, 494.2587890625, 547.30224609375, 528.1173706054688], "page": 124, "span": [0, 275], "__ref_s3_data": null}]}, {"text": "For example, assume TABLE1 and TABLE2 have active column access control and for insert, selecting data from TABLE2 returns the masked data. The following INSERT returns an error:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.38754272460938, 460.2991638183594, 547.1968383789062, 482.6835632324219], "page": 124, "span": [0, 178], "__ref_s3_data": null}]}, {"text": "INSERT INTO TABLE1 SELECT * FROM TABLE2", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [136.7989959716797, 443.54840087890625, 331.6763000488281, 452.850341796875], "page": 124, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "The masked data that is returned from the SELECT * FROM TABLE2 might not be valid input data for TABLE1 because of data type or column check constraint.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.99356079101562, 408.8760070800781, 533.7767333984375, 431.30462646484375], "page": 124, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "There are two ways to prevent this situation from happening: Define a check constraint or create a before trigger.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.01171875, 374.921630859375, 532.6522827148438, 397.3339538574219], "page": 124, "span": [0, 114], "__ref_s3_data": null}]}, {"text": "6.8.1 Check constraint solution", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.31849670410156, 342.6772766113281, 260.1020202636719, 355.5576477050781], "page": 124, "span": [0, 31], "__ref_s3_data": null}]}, {"text": "One way to prevent this problem is to define a check constraint.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2911376953125, 319.0434875488281, 416.498779296875, 329.2791442871094], "page": 124, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "As part of RCAC, new SQL syntax is provided to allow an action to be performed when a violation of the check constraints check condition occurs instead of giving that error. However, if the check condition is still not met after the action, a hard error is returned. A check constraint with the new on-violation-clause is allowed on both the CREATE TABLE and ALTER TABLE statements.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.77337646484375, 249.27944946289062, 547.2566528320312, 307.6162109375], "page": 124, "span": [0, 382], "__ref_s3_data": null}]}, {"text": "In the Example 6-4, the mask is defined to return a value of 'XXX-XX-nnnn' for any query that is not done by a user profile in the DBMGR group. The constraint checks that the column SSN does not have the masked value.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9890899658203, 203.26028442382812, 547.2803955078125, 237.4848175048828], "page": 124, "span": [0, 217], "__ref_s3_data": null}]}, {"text": "Example 6-4 Check constraint to avoid masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.4429702758789, 181.8981170654297, 277.19195556640625, 191.441650390625], "page": 124, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "CREATE SCHEMA MY_LIB SET SCHEMA MY_LIB CREATE TABLE MY_LIB.EMP_INFO (COL1_name CHAR(10) WITH DEFAULT 'DEFAULT', COL2_ssn CHAR(11) WITH DEFAULT 'DEFAULT') CREATE MASK MASK_ssn ON MY_LIB.EMP_INFO FOR COLUMN COL2_ssn RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBMGR' ) = 1 THEN COL2_ssn", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [64.3324966430664, 54.632633209228516, 414.59515380859375, 173.83340454101562], "page": 124, "span": [0, 297], "__ref_s3_data": null}]}, {"text": "108", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.66724395751953, 27.93828010559082, 83.98200225830078, 37.50567626953125], "page": 124, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.35574340820312, 27.725317001342773, 339.9448547363281, 37.34313201904297], "page": 124, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "ELSE 'XXX-XX-'||SUBSTR(COL2_ssn,8,4) END ENABLE | /* Check constraint for the update and insert.*/ ALTER TABLE MY_LIB.EMP_INFO ADD CONSTRAINT MASK_ssn_preserve CHECK(SUBSTR(COL2_ssn,1,7)<>'XXX-XX-') -- Allow any value other than the mask ON UPDATE VIOLATION PRESERVE COL2_ssn -- Don't update the mask portion of the existing value ON INSERT VIOLATION SET COL2_ssn = DEFAULT -- for insert set this to the default value.", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [63.85445022583008, 599.5280151367188, 545.2154541015625, 721.5641479492188], "page": 125, "span": [0, 418], "__ref_s3_data": null}]}, {"text": "6.8.2 Before trigger solution", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.34185028076172, 563.6049194335938, 240.5440673828125, 576.7313842773438], "page": 125, "span": [0, 29], "__ref_s3_data": null}]}, {"text": "The actions that are described in Example 6-4 on page 108 for ON UPDATE VIOLATION and ON INSERT VIOLATION also can be handled by a before trigger, as shown in Example 6-5.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.21571350097656, 528.1804809570312, 547.3193359375, 550.7018432617188], "page": 125, "span": [0, 171], "__ref_s3_data": null}]}, {"text": "Example 6-5 Before trigger to avoid masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.4420166015625, 505.8026428222656, 336.79412841796875, 516.0908203125], "page": 125, "span": [0, 47], "__ref_s3_data": null}]}, {"text": "CREATE TRIGGER PREVENT_MASK_SSN BEFORE INSERT OR UPDATE ON MY_LIB.EMP_INFO REFERENCING NEW ROW AS N OLD ROW AS O FOR EACH ROW MODE DB2ROW SECURED WHEN(SUBSTR(N.COL2_ssn,1,7) = 'XXX-XX-') BEGIN IF INSERTING THEN SET N.COL2_ssn = DEFAULT; ELSEIF UPDATING THEN SET N.COL2_ssn = O.COL2_ssn; END IF; END", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.70103454589844, 380.07110595703125, 508.6725158691406, 500.4696044921875], "page": 125, "span": [0, 298], "__ref_s3_data": null}]}, {"text": "6.9 Triggers and functions (SECURED)", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.51354217529297, 330.2432861328125, 360.91705322265625, 346.4449462890625], "page": 125, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "There are some considerations that must be considered when there are triggers and functions on tables that have RCAC enabled. The purpose of SECURE for triggers and functions is so that a user who is allowed to create a trigger or function is not necessarily able to make it SECURE themselves. This prevents the trigger/function developer from adding code that skims off data that they are not allowed to see.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.73545837402344, 255.84051513671875, 547.2467651367188, 314.6852722167969], "page": 125, "span": [0, 409], "__ref_s3_data": null}]}, {"text": "6.9.1 Triggers", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.3808822631836, 222.538818359375, 151.61126708984375, 235.96595764160156], "page": 125, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "Triggers have access to the data in rows outside of the row permission or column masking. An after trigger has access to the new row image after the permission has allowed the update or insert to occur. Therefore, the triggers can potentially change the insert or update image value so that it violates the permission.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.48204040527344, 164.04632568359375, 547.2885131835938, 210.0581817626953], "page": 125, "span": [0, 318], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [370.8166809082031, 28.073514938354492, 517.9691772460938, 37.06842803955078], "page": 125, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "109", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.3563842773438, 27.93828010559082, 547.2587890625, 37.650997161865234], "page": 125, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "110", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.57964324951172, 27.93828010559082, 83.98200225830078, 37.686649322509766], "page": 126, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.65092468261719, 27.779094696044922, 339.9294738769531, 37.28006362915039], "page": 126, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Any triggers that are defined on a table must be created with an attribute that designates that it is SECURED when RCAC definitions are created or altered for that table, as shown in Example 6-6. The same applies to a view that has an instead of trigger. That trigger must be secure at the point RCAC is enabled for any of the underlying tables the view is over.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.88360595703125, 674.990234375, 547.1917114257812, 721.3570556640625], "page": 126, "span": [0, 362], "__ref_s3_data": null}]}, {"text": "Example 6-6 Trigger SECURED", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.5281524658203, 653.42236328125, 269.4378967285156, 663.7034301757812], "page": 126, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "/* Trigger created with the SECURED attribute */ CREATE TRIGGER PREVENT_MASK_SSN BEFORE INSERT OR UPDATE ON MY_LIB.EMP_INFO REFERENCING NEW ROW AS N OLD ROW AS O FOR EACH ROW MODE DB2ROW SECURED WHEN(SUBSTR(N.COL2_ssn,1,7) = 'XXX-XX-') BEGIN IF INSERTING THEN SET N.COL2_ssn = DEFAULT; ELSEIF UPDATING THEN SET N.COL2_ssn = O.COL2_ssn; END IF; END", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.4291229248047, 513.5558471679688, 508.6703796386719, 648.5309448242188], "page": 126, "span": [0, 347], "__ref_s3_data": null}]}, {"text": "6.9.2 Functions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.23265075683594, 476.65728759765625, 166.5321044921875, 489.4510192871094], "page": 126, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "Within a CREATE PERMISSION or CREATE MASK , a function can be called. Because that UDF has access to the data before the RCAC rules are applied, the SECURE attribute is required on that function, as shown in Example 6-7.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7004852294922, 429.2786560058594, 547.2664794921875, 463.3634948730469], "page": 126, "span": [0, 220], "__ref_s3_data": null}]}, {"text": "Example 6-7 Specifying SECURED on a function", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.63314819335938, 407.6082763671875, 337.031494140625, 417.7618408203125], "page": 126, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "CREATE PERMISSION SCHEMA.PERM1 ON SCHEMA.TABLE1 FOR ROWS WHERE MY_UDF(CURRENT_USER,COLUMN1) = 1 ENFORCED FOR ALL ACCESS ENABLE; CREATE FUNCTION MY_UDF (INP1 CHAR(32), INP2 INTEGER) Returns INTEGER LANGUAGE SQL CONTAINS SQL SECURED", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [135.52565002441406, 270.5191650390625, 446.8673400878906, 403.1766662597656], "page": 126, "span": [0, 230], "__ref_s3_data": null}]}, {"text": "The SECURED attribute of MY_UDF signifies that the function is considered secure for RCAC. If a function is called from an SQL statement, and references a column in a table that has RCAC, it must be declared as secure. In that case, if the secure function calls other functions, they are not validated to confirm whether they are secure.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.83522033691406, 204.99681091308594, 547.2584838867188, 251.3785858154297], "page": 126, "span": [0, 337], "__ref_s3_data": null}]}, {"text": "Consider the following examples:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2251434326172, 183.0125732421875, 282.6751708984375, 193.37486267089844], "page": 126, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "GLYPH Table1 has RCAC defined and enabled. SELECT MY_UDF2(Column2) from schema.table1. MY_UDF2 must be created with the SECURED attribute. If MY_UDF2 invokes MY_UDF3, there is no checking to ensure that it is also created with SECURED. NOT SECURED is the default on the create function unless SECURED is explicitly selected.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4008026123047, 108.28175354003906, 547.1887817382812, 176.30027770996094], "page": 126, "span": [0, 334], "__ref_s3_data": null}]}, {"text": "This same rule applies for any function that might be invoked with a masked column specified as an argument.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.5240478515625, 78.86335754394531, 523.39453125, 101.21600341796875], "page": 126, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "GLYPH Table2 column SSN has a column mask that is defined on it. SELECT MY_UDF4(SSN) from table2. Because SSN has a column mask that is defined, MY_UDF4 must be created with the SECURED attribute.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.78050231933594, 682.29833984375, 537.510986328125, 721.4716796875], "page": 127, "span": [0, 206], "__ref_s3_data": null}]}, {"text": "6.10 RCAC is only one part of the solution", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.45398712158203, 638.97607421875, 387.579833984375, 655.1737060546875], "page": 127, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "When designing and implementing RCAC row permissions, special attention should be given to the effectiveness and limitations of controlling data access. Data can be housed in objects other than tables or physical files. The role and responsibility of the database user, for example, the database engineer, must be reconciled with their respective authority and access privileges.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8080291748047, 564.271484375, 547.2545776367188, 622.5728759765625], "page": 127, "span": [0, 379], "__ref_s3_data": null}]}, {"text": "Figure 6-25 illustrates that object level security is the first check and that RCAC permissions provide control only on tables and physical files.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1082000732422, 529.473388671875, 544.8680419921875, 551.9183349609375], "page": 127, "span": [0, 146], "__ref_s3_data": null}]}, {"text": "Figure 6-25 Object-level security and RCAC permissions", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.07928466796875, 218.3541717529297, 366.65814208984375, 228.08248901367188], "page": 127, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/110"}, {"text": "To get access to the table and the rows, the user must pass the object level authority test and the RCAC permission test.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.93563842773438, 179.97857666015625, 547.2168579101562, 201.9317626953125], "page": 127, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "The IBM i journal captures the transactional data and places an image of the row in the journal receiver. If the user has access to the journal receiver, the row image can be viewed if the user has authority to the journal receiver.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6477508544922, 133.73806762695312, 547.2177124023438, 167.70111083984375], "page": 127, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "Although the SQL Plan Cache data, the SQL Plan Cache Snapshot data, and the SQL Performance Monitor data do not reveal the results of queries, they can show the literal values that are passed along with the SQL statements.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03945922851562, 87.78772735595703, 547.24267578125, 122.0500259399414], "page": 127, "span": [0, 222], "__ref_s3_data": null}]}, {"text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [370.82598876953125, 28.084239959716797, 517.9691772460938, 37.07847213745117], "page": 127, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "111", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.201904296875, 27.93828010559082, 547.2587890625, 37.48694610595703], "page": 127, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "112", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.64434051513672, 27.93828010559082, 83.98200225830078, 37.513153076171875], "page": 128, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.78141784667969, 27.795108795166016, 339.9560546875, 37.278629302978516], "page": 128, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "The ability to monitor, analyze, debug, and tune data-centric applications effectively and efficiently requires some understanding of the underlying data, or at least the attributes of the data. The organization must be willing to reconcile the conflicting requirements of \"restricting access to data\", and \"needing access to data\".", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89059448242188, 674.9641723632812, 547.2962646484375, 721.255615234375], "page": 128, "span": [0, 332], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/111"}, {"text": "Chapter 7.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 129, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "7", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 129, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control management", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 481.3484191894531, 547.2606201171875, 538.65869140625], "page": 129, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "After Row and Column Access Control (RCAC) definitions are defined and activated in a database, your management processes must be adjusted to accommodate these new security controls. This chapter highlights some of the changes that should be considered.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03298950195312, 409.35833740234375, 530.23193359375, 443.9773254394531], "page": 129, "span": [0, 253], "__ref_s3_data": null}]}, {"text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.26803588867188, 387.5925598144531, 347.4121398925781, 398.0865478515625], "page": 129, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Managing row permissions and column masks", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7023162841797, 370.8629455566406, 356.2835388183594, 381.1903076171875], "page": 129, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "GLYPH Managing tables with row permissions and column masks", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.62911987304688, 358.95751953125, 406.06463623046875, 369.439453125], "page": 129, "span": [0, 69], "__ref_s3_data": null}]}, {"text": "GLYPH Monitoring and auditing function usage", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.61477661132812, 346.9558410644531, 323.0509338378906, 357.3504943847656], "page": 129, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.98678970336914, 27.78120994567871, 257.24334716796875, 37.33966064453125], "page": 129, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "113", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.2393188476562, 27.93828010559082, 547.2587890625, 37.526824951171875], "page": 129, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "7.1 Managing row permissions and column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.21974182128906, 702.385498046875, 449.7918701171875, 718.162109375], "page": 130, "span": [0, 45], "__ref_s3_data": null}]}, {"text": "This section focuses on the management of the RCAC row permissions and column masks.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.92994689941406, 675.7836303710938, 541.12109375, 685.9977416992188], "page": 130, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "7.1.1 Source management", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.02565002441406, 643.1517333984375, 228.30335998535156, 656.0827026367188], "page": 130, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "The SQL statements that are used to define row permissions and column masks should be managed with a change management process. Ideally, you already are using a change management process for your database definitions, and that same process can be extended to cover your RCAC definitions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.09738159179688, 583.5279541015625, 546.4277954101562, 630.0850219726562], "page": 130, "span": [0, 287], "__ref_s3_data": null}]}, {"text": "If you are using SQL DDL to define your DB2 tables, then you have the option of adding the RCAC definitions to the same source file as the table definition. The benefit of this approach is that it keeps all DDL that is related to a table in a single source file. The downside is that if you must re-create only the RCAC definitions and leave the table unchanged, then you must identify and extract only the RCAC definitions from the source file. There are situations where the row permissions and column masks must be changed or re-created without changing the definition of the associated table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.02606201171875, 490.18017578125, 547.2933959960938, 571.9515991210938], "page": 130, "span": [0, 596], "__ref_s3_data": null}]}, {"text": "7.1.2 Modifying definitions", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.02523803710938, 457.12799072265625, 231.4643096923828, 470.1705322265625], "page": 130, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "After RCAC is activated for a table, the row permission and column mask definitions can be re-created to change the data access behavior for that table. Usage of the OR REPLACE clause on the CREATE MASK and CREATE PERMISSION SQL statements simplifies the re-creation process by folding in the deletion of the existing RCAC definition.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.07144165039062, 397.6028137207031, 547.1988525390625, 443.98638916015625], "page": 130, "span": [0, 334], "__ref_s3_data": null}]}, {"text": "This capability makes it easy to change your RCAC definitions as you test the controls with your applications and identify tweaks that must be made to your RCAC implementation. However, re-creation of RCAC definitions does require an exclusive lock to be acquired on the table during the process.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.90682983398438, 339.7182312011719, 547.2922973632812, 385.9692687988281], "page": 130, "span": [0, 296], "__ref_s3_data": null}]}, {"text": "7.1.3 Turning on and off", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.17169189453125, 307.0446472167969, 214.5146026611328, 320.0962829589844], "page": 130, "span": [0, 24], "__ref_s3_data": null}]}, {"text": "As described in 3.1.2, \"Enabling and activating RCAC\" on page 16, the SQL ALTER statement can turn on and off row permissions and column masks. The ALTER MASK and A LTER PERMISSION statements allow an individual row permission or column mask to be turned off with the DISABLE option and back on with the ENABLE option. The ALTER TABLE statement can deactivate enforcement of all the row permissions and column masks for a single table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.82620239257812, 235.83880615234375, 547.182861328125, 294.0208435058594], "page": 130, "span": [0, 435], "__ref_s3_data": null}]}, {"text": "Important: Although these capabilities make it easy to temporarily turn off RCAC security so that you can make environment or application changes, these processes require an exclusive lock to be obtained on a table. Therefore, this activity must be planned carefully to avoid disruptions and outages.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [142.0942840576172, 171.44253540039062, 541.1311645507812, 218.12530517578125], "page": 130, "span": [0, 300], "__ref_s3_data": null}]}, {"text": "7.1.4 Regenerating", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.17559051513672, 134.25454711914062, 183.9399871826172, 147.41159057617188], "page": 130, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "DB2 also can regenerate an existing row permission or column mask. This regenerate option can be useful with more complex RCAC definitions that reference other DB2 objects.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.10853576660156, 99.14614868164062, 547.3272705078125, 121.23512268066406], "page": 130, "span": [0, 172], "__ref_s3_data": null}]}, {"text": "114", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.60557556152344, 27.93828010559082, 83.98200225830078, 37.56919860839844], "page": 130, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.59931182861328, 27.808855056762695, 339.8987731933594, 37.270851135253906], "page": 130, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "For example, consider a row permission on an ACCOUNTS table (PERMISSION1_ON_ACCOUNTS). The ACCOUNTS table row permission references and compares columns in the CUSTOMERS table. When the definition of the CUSTOMERS table changes, DB2 does not check to determine whether the change to the CUSTOMERS table breaks the ACCOUNTS table row permission. If this table definition change does break the row permission, an error does not surface until an application tries to read rows from the ACCOUNTS table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.86671447753906, 639.279052734375, 547.2010498046875, 721.4535522460938], "page": 131, "span": [0, 498], "__ref_s3_data": null}]}, {"text": "Instead of waiting for an application to detect this error, the REGENERATE option can be used on the ACCOUNTS row permission. The REGENERATE option returns an error if the change in the CUSTOMERS table definition causes the row permission to be invalid. In this way, the row permission can be proactively corrected before an application discovers the error.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.08230590820312, 580.8378295898438, 547.1602172851562, 627.391845703125], "page": 131, "span": [0, 357], "__ref_s3_data": null}]}, {"text": "7.2 Managing tables with row permissions and column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.27880096435547, 537.6162719726562, 536.6239013671875, 553.5975341796875], "page": 131, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "This section examines the object management considerations after RCAC is added to a DB2 table.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0673370361328, 499.2984619140625, 547.2647094726562, 521.5249633789062], "page": 131, "span": [0, 94], "__ref_s3_data": null}]}, {"text": "7.2.1 Save and restore", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.25625610351562, 466.6372985839844, 205.3369598388672, 479.3948059082031], "page": 131, "span": [0, 22], "__ref_s3_data": null}]}, {"text": "Row permissions and column masks are stored in the DB2 table object itself, so they are automatically saved and restored when the DB2 table object is saved and restored. Therefore, no adjustments must be made to your database backup process to accommodate RCAC.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.18902587890625, 407.25885009765625, 547.1621704101562, 453.1397399902344], "page": 131, "span": [0, 261], "__ref_s3_data": null}]}, {"text": "Save and restore processing works fine with RCAC if the RCAC definition does not reference other DB2 objects other than the table over which they are defined. When the RCAC definition has dependencies on other DB2 objects, the restore process is much more challenging.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.97389221191406, 360.8598937988281, 547.2257080078125, 395.4291687011719], "page": 131, "span": [0, 268], "__ref_s3_data": null}]}, {"text": "Chapter 7. Row and Column Access Control management", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [283.8978271484375, 27.887859344482422, 518.0120849609375, 37.17605972290039], "page": 131, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "115", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.184326171875, 27.93828010559082, 547.2587890625, 37.493892669677734], "page": 131, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "116", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.5724105834961, 27.93828010559082, 83.98200225830078, 37.497066497802734], "page": 132, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.69707489013672, 27.771995544433594, 340.0185546875, 37.28822326660156], "page": 132, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "For example, assume that the BANKSCHEMA library (which is the system name or short name for the schema long name of BANK_SCHEMA) is saved and restored into a library named BANK_TEST. Recall from the example in 7.1.4, \"Regenerating\" on page 114 that the row permission on the ACCOUNTS table references the CUSTOMERS table (\u2026 SELECT C.CUSTOMER_ID FROM CUSTOMERS C \u2026). After the restore operation, the ACCOUNTS row permission still references the CUSTOMERS table in BANK_SCHEMA because DB2 explicitly qualifies all object references when the row permission or column mask is created. The restore processing does not change the explicit qualification from BANK_SCHEMA to BANK_TEST. As a result, the restored ACCOUNTS row permission now depends on DB2 objects residing in a different schema, even though it was not created that way originally. For more details, see Figure 7-1.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.94317626953125, 591.0215454101562, 546.4418334960938, 721.4165649414062], "page": 132, "span": [0, 872], "__ref_s3_data": null}]}, {"text": "Figure 7-1 Restoring tables to different schemas", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [135.8966064453125, 325.75177001953125, 333.6893005371094, 335.1720886230469], "page": 132, "span": [0, 48], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/112"}, {"text": "The only way to fix this issue is to re-create the row permission or column mask after the restore operation. Re-creation of the row permission or column mask is required only for definitions that reference other DB2 objects, but it is simpler to re-create all of the RCAC definitions instead of a subset. For example, generate the SQL using System i Navigator, clear the \"Schema qualify names for objects\" and select the \"OR REPLACE clause\", and then run the generated script.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9806365966797, 239.0277557373047, 547.2655639648438, 309.4206237792969], "page": 132, "span": [0, 477], "__ref_s3_data": null}]}, {"text": "7.2.2 Table migration", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.11895751953125, 206.32223510742188, 196.41009521484375, 219.43421936035156], "page": 132, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "There are several IBM i CL commands, such as Move Object ( MOVOBJ ), Create Duplicate Object ( CRTDUPOBJ ), and Copy Library ( CPYLIB ), which are used to migrate a table from one library to another one. Often, this migration is done to create different versions of the table that can be used for development or testing purposes.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7648468017578, 146.9532012939453, 538.5225830078125, 193.4559783935547], "page": 132, "span": [0, 329], "__ref_s3_data": null}]}, {"text": "The migration of a table with RCAC has the same challenges as restore processing. If the RCAC definition references other DB2 objects, then IBM i CL commands do not change the schema names that are explicitly qualified by the DB2 internal RCAC processing.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84234619140625, 100.9890365600586, 542.6978149414062, 135.3026123046875], "page": 132, "span": [0, 255], "__ref_s3_data": null}]}, {"text": "Again, re-creating the row permission or column mask is the only way to fix the issue of references to DB2 objects in other schemas.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9188232421875, 67.12712097167969, 524.2598876953125, 89.22772216796875], "page": 132, "span": [0, 132], "__ref_s3_data": null}]}, {"text": "7.3 Monitoring and auditing function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.26126861572266, 705.5285034179688, 396.1838684082031, 721.473876953125], "page": 133, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "While establishing proper roles for users, separating duties using function usage IDs, and defining RCAC policies allows you to implement an effective and pervasive data access control scheme. How do you monitor and audit everyone who is involved in the implementation of that scheme? The answer is to use IBM i journaling. A special journal that is called QAUDJRN, also known as the audit journal , can provide a record and audit trail of many security relevant events that occur on the system, including RCAC-related events.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9081268310547, 619.106201171875, 546.3292236328125, 689.3731689453125], "page": 133, "span": [0, 526], "__ref_s3_data": null}]}, {"text": "The tasks and operations of security administrators and database engineers who are collaborating can (and should) be effectively monitored and audited to ensure that the organization's data access control and governance policies are in place and enabled. For example, the Database Engineers can be involved in designing and developing functions and triggers that must be secured using the SECURE attribute. Otherwise, without properly securing functions and triggers, the RCAC controls can be bypassed.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.88485717773438, 536.9307250976562, 547.2933349609375, 607.4329833984375], "page": 133, "span": [0, 502], "__ref_s3_data": null}]}, {"text": "A new journal entry type of \"AX\" for journal entry code \"T\" (audit trail) is now used for RCAC. More information about the journaling of RCAC operations can be found in the following documents:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.82188415527344, 491.26055908203125, 546.2147216796875, 525.3898315429688], "page": 133, "span": [0, 193], "__ref_s3_data": null}]}, {"text": "GLYPH IBM i Version 7.2 Journal Management Guide , found at:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.63038635253906, 474.28076171875, 396.8944396972656, 484.52532958984375], "page": 133, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzaki/rzakiprintthis .htm?lang=en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.8509979248047, 445.02569580078125, 545.9945678710938, 467.0314025878906], "page": 133, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "GLYPH IBM i Version 7.2 Security Reference Guide , found at:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.46853637695312, 427.678466796875, 387.5917663574219, 437.95831298828125], "page": 133, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzarl/rzarlkickoff.h tm?lang=en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.4048309326172, 399.1193542480469, 546.0806274414062, 421.1822204589844], "page": 133, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "Chapter 7. Row and Column Access Control management", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [284.0103454589844, 27.87947654724121, 518.0120849609375, 37.15098190307617], "page": 133, "span": [0, 51], "__ref_s3_data": null}]}, {"text": "117", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.2394409179688, 27.93828010559082, 547.2671508789062, 37.50862121582031], "page": 133, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "118", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.73052215576172, 27.93828010559082, 83.98200225830078, 37.495025634765625], "page": 134, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.7788314819336, 27.781814575195312, 340.0904846191406, 37.32704544067383], "page": 134, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/113"}, {"text": "Chapter 8.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 135, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "Designing and planning for success", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 482.1217956542969, 479.93341064453125, 538.7562866210938], "page": 135, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "Although successfully implementing Row and Column Access Control (RCAC) is based on knowledge and skills, designing and planning are fundamental aspects. This chapter describes the need for a deep understanding of the technology, and good design, proper planning, and adequate testing.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.87625122070312, 397.4395751953125, 538.4698486328125, 444.0196228027344], "page": 135, "span": [0, 285], "__ref_s3_data": null}]}, {"text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03575134277344, 375.6953125, 347.4120788574219, 386.0404052734375], "page": 135, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Implementing RCAC with good design and proper planning", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7108612060547, 358.8554992675781, 411.53955078125, 369.3179626464844], "page": 135, "span": [0, 70], "__ref_s3_data": null}]}, {"text": "GLYPH DB2 for i Center of Excellence", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.673095703125, 347.0796813964844, 284.81951904296875, 357.7647705078125], "page": 135, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.9481315612793, 27.7833251953125, 257.24334716796875, 37.34917449951172], "page": 135, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "119", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.1824951171875, 27.93828010559082, 547.2587890625, 37.699867248535156], "page": 135, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "8", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 135, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "8.1 Implementing RCAC with good design and proper planning", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.43937683105469, 702.308837890625, 544.5512084960938, 718.29345703125], "page": 136, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "By using RCAC, the row and column data that is returned to the requester can be controlled and governed by a set of data-centric policies that are defined with SQL and implemented within DB2 for i.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.76779174804688, 652.1786499023438, 544.7098999023438, 685.9956665039062], "page": 136, "span": [0, 197], "__ref_s3_data": null}]}, {"text": "RCAC provides fine-grained access control and is complementary to IBM i object-level security. With the new RCAC feature of DB2 for i, the database engineer, in partnership with the data owner and security officer, can ensure that users have access to the data based on their level of authorization and responsibility.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.20762634277344, 593.9513549804688, 545.359375, 640.1348876953125], "page": 136, "span": [0, 318], "__ref_s3_data": null}]}, {"text": "This situation also can include separation of duties, such as allowing the application developers to design and implement the solutions, but restricting them from accessing the production data based on policy. Just because someone writes and owns the program, it does not mean that they have access to all the sensitive data that their program can potentially read.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89334106445312, 524.1407470703125, 547.2506103515625, 581.9553833007812], "page": 136, "span": [0, 365], "__ref_s3_data": null}]}, {"text": "This paper has described the following pervasive power and advantages of RCAC:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.08802795410156, 501.609619140625, 500.5572509765625, 512.1461791992188], "page": 136, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "GLYPH Access can be controlled through simple or sophisticated logic.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.58950805664062, 484.95391845703125, 429.4506530761719, 495.30487060546875], "page": 136, "span": [0, 79], "__ref_s3_data": null}]}, {"text": "GLYPH Virtually no application changes are required.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6150665283203, 473.0511474609375, 351.649169921875, 483.7167053222656], "page": 136, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "GLYPH The implementation of the access policy is part of the DB2 data access layer.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.59262084960938, 460.7434387207031, 491.7782287597656, 471.5057067871094], "page": 136, "span": [0, 93], "__ref_s3_data": null}]}, {"text": "GLYPH Table data is protected regardless of the interface that is used.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65518188476562, 448.6240234375, 426.24566650390625, 459.2989196777344], "page": 136, "span": [0, 81], "__ref_s3_data": null}]}, {"text": "GLYPH No user is inherently exempted from the access control policies.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.66162109375, 436.8657531738281, 433.2464599609375, 447.2756652832031], "page": 136, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "GLYPH Groups of users can share policies and permissions.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.51698303222656, 424.99517822265625, 383.5718688964844, 435.4136657714844], "page": 136, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "A deep understanding of the technology, and proper planning, good design, adequate testing, and monitored deployment are critical for success. This includes the usage of quality assurance testing, and realistic performance and scalability exercises that serve to demonstrate that all of your requirements are being met. As part of the verification process, the usage of in-depth proofs of concepts and proofs of technology are recommended, if not essential. When RCAC is activated, the results of queries can change. Anticipating this change and realizing the effects of RCAC before going live are of the utmost importance.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.77777099609375, 330.777099609375, 547.245361328125, 413.263427734375], "page": 136, "span": [0, 623], "__ref_s3_data": null}]}, {"text": "With the ever-growing value of data, and the vast and varied database technology that is available today, it is crucial to have a person or persons on staff who specialize in data-centric design, development, and deployment. This role and responsibility falls on the database engineer. With the availability of DB2 RCAC, the importance of full-time database engineering has grown.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.85911560058594, 260.817626953125, 547.19580078125, 319.1590881347656], "page": 136, "span": [0, 380], "__ref_s3_data": null}]}, {"text": "8.2 DB2 for i Center of Excellence", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.4874267578125, 217.91627502441406, 324.01275634765625, 233.66151428222656], "page": 136, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "To further assist you with understanding and implementing RCAC, the DB2 for i Center of Excellence team offers an RCAC education and consulting workshop. In addition to knowledge transfer, a working session allows for a review of your data access control requirements, review of the current environment, solution ideation, and high-level solution design.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7983856201172, 142.796875, 533.2318115234375, 201.5377197265625], "page": 136, "span": [0, 354], "__ref_s3_data": null}]}, {"text": "If you are interested in engaging with the DB2 for i Center of Excellence, contact Mike Cain at mcain@us.ibm.com .", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8557586669922, 109.11979675292969, 547.2406005859375, 131.31752014160156], "page": 136, "span": [0, 114], "__ref_s3_data": null}]}, {"text": "120", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.63890075683594, 27.93828010559082, 83.98200225830078, 37.58589172363281], "page": 136, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.74649047851562, 27.775848388671875, 339.86614990234375, 37.33769607543945], "page": 136, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/114"}, {"text": "Appendix A.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [74.4000015258789, 517.019287109375, 115.15289306640625, 523.457275390625], "page": 137, "span": [0, 11], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/115"}, {"text": "Database definitions for the RCAC banking example", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 481.3309326171875, 485.7971496582031, 538.6655883789062], "page": 137, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "This appendix provides the database definitions or DDLs to re-create the Row and Column Access Control (RCAC) scenario that is described in Chapter 4, \"Implementing Row and Column Access Control: Banking example\" on page 37. The script that is shown in Example A-1 is the DDL script that is used to implement this example.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.57388305664062, 397.5041198730469, 539.8473510742188, 444.1872253417969], "page": 137, "span": [0, 322], "__ref_s3_data": null}]}, {"text": "Example A-1 DDL script to implement the RCAC banking example", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.50365447998047, 376.32855224609375, 333.3837585449219, 386.06781005859375], "page": 137, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "/* Database Definitions for RCAC Bank Scenario */ /* Schema */ CREATE SCHEMA BANK_SCHEMA FOR SCHEMA BANKSCHEMA ; /* Global Variable */ CREATE VARIABLE BANK_SCHEMA.CUSTOMER_LOGIN_ID VARCHAR( 30) ; LABEL ON VARIABLE BANK_SCHEMA.CUSTOMER_LOGIN_ID IS 'Customer''s log in value passed by web application' ; /* Tables */ CREATE TABLE BANK_SCHEMA.CUSTOMERS ( CUSTOMER_ID FOR COLUMN CUSTO00001 INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), CUSTOMER_NAME FOR COLUMN CUSTO00002 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_ADDRESS FOR COLUMN CUSTO00003 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_CITY FOR COLUMN CUSTO00004 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_STATE FOR COLUMN CUSTO00005 CHAR(2) CCSID 37 NOT NULL , CUSTOMER_PHONE FOR COLUMN CUSTO00006 CHAR(10) CCSID 37 NOT NULL , CUSTOMER_EMAIL FOR COLUMN CUSTO00007 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_TAX_ID FOR COLUMN CUSTO00008 CHAR(11) CCSID 37 NOT NULL , CUSTOMER_DRIVERS_LICENSE_NUMBER FOR COLUMN CUSTO00012 CHAR(13) CCSID 37 DEFAULT NULL , CUSTOMER_LOGIN_ID FOR COLUMN CUSTO00009 VARCHAR(30) CCSID 37 DEFAULT NULL , CUSTOMER_SECURITY_QUESTION FOR COLUMN CUSTO00010 VARCHAR(100) CCSID 37 DEFAULT NULL ,", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [63.48943328857422, 61.49855422973633, 541.1366577148438, 370.9224853515625], "page": 137, "span": [0, 1229], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.879676818847656, 27.74163246154785, 257.24334716796875, 37.323570251464844], "page": 137, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "121", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.1998291015625, 27.93828010559082, 547.2587890625, 37.750850677490234], "page": 137, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "CUSTOMER_SECURITY_QUESTION_ANSWER FOR COLUMN CUSTO00011 VARCHAR(100) CCSID 37 DEFAULT NULL , INSERT_TIMESTAMP FOR COLUMN INSER00001 TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDAT00001 TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.CUSTOMER_ID_PK PRIMARY KEY( CUSTOMER_ID ) ) ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_LOGIN_ID_UK UNIQUE( CUSTOMER_LOGIN_ID ) ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_DRIVERS_LICENSE_CHECK CHECK( CUSTOMER_DRIVERS_LICENSE_NUMBER <> '*************' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_DRIVERS_LICENSE_NUMBER ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_EMAIL_CHECK CHECK( CUSTOMER_EMAIL <> '****@****' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_EMAIL ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_LOGIN_ID_CHECK CHECK( CUSTOMER_LOGIN_ID <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_LOGIN_ID = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_LOGIN_ID ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_SECURITY_QUESTION_CHECK CHECK( CUSTOMER_SECURITY_QUESTION_ANSWER <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_SECURITY_QUESTION_ANSWER = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_SECURITY_QUESTION_ANSWER ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_SECURITY_QUESTION_ANSWER CHECK( CUSTOMER_SECURITY_QUESTION <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_SECURITY_QUESTION = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_SECURITY_QUESTION ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_TAX_ID_CHECK CHECK( CUSTOMER_TAX_ID <> 'XXX-XX-XXXX' AND SUBSTR ( CUSTOMER_TAX_ID , 1 , 7 ) <> 'XXX-XX-' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_TAX_ID ; CREATE TABLE BANK_SCHEMA.ACCOUNTS ( ACCOUNT_ID INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), CUSTOMER_ID FOR COLUMN CUSTID INTEGER NOT NULL , ACCOUNT_NUMBER FOR COLUMN ACCOUNTNO VARCHAR(50) CCSID 37 NOT NULL , ACCOUNT_NAME FOR COLUMN ACCOUNTNAM CHAR(12) CCSID 37 NOT NULL , ACCOUNT_DATE_OPENED FOR COLUMN OPENDATE DATE DEFAULT CURRENT_DATE , ACCOUNT_DATE_CLOSED FOR COLUMN CLOSEDATE DATE DEFAULT NULL , ACCOUNT_CURRENT_BALANCE FOR COLUMN ACCTBAL DECIMAL(11, 2) NOT NULL DEFAULT 0 , INSERT_TIMESTAMP FOR COLUMN INSDATE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDDATE TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.ACCOUNT_ID_PK PRIMARY KEY( ACCOUNT_ID ) );", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [62.94416046142578, 73.59288024902344, 546.5369873046875, 721.5032348632812], "page": 138, "span": [0, 2754], "__ref_s3_data": null}]}, {"text": "122", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.5491943359375, 27.93828010559082, 83.98200225830078, 37.62962341308594], "page": 138, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.43730926513672, 27.892166137695312, 340.0867614746094, 37.298946380615234], "page": 138, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "ALTER TABLE BANK_SCHEMA.ACCOUNTS ADD CONSTRAINT BANK_SCHEMA.ACCOUNT_CUSTOMER_ID_FK FOREIGN KEY( CUSTOMER_ID ) REFERENCES BANK_SCHEMA.CUSTOMERS ( CUSTO00001 ) ON DELETE RESTRICT ON UPDATE RESTRICT ; ALTER TABLE BANK_SCHEMA.ACCOUNTS ADD CONSTRAINT BANK_SCHEMA.ACCOUNT_NUMBER_CHECK CHECK( ACCOUNT_NUMBER <> '*****' ) ON UPDATE VIOLATION PRESERVE ACCOUNT_NUMBER ; CREATE TABLE BANK_SCHEMA.TRANSACTIONS FOR SYSTEM NAME TRANS ( TRANSACTION_ID FOR COLUMN TRANS00001 INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), ACCOUNT_ID INTEGER NOT NULL , TRANSACTION_TYPE FOR COLUMN TRANS00002 CHAR(1) CCSID 37 NOT NULL , TRANSACTION_DATE FOR COLUMN TRANS00003 DATE NOT NULL DEFAULT CURRENT_DATE , TRANSACTION_TIME FOR COLUMN TRANS00004 TIME NOT NULL DEFAULT CURRENT_TIME , TRANSACTION_AMOUNT FOR COLUMN TRANS00005 DECIMAL(11, 2) NOT NULL , INSERT_TIMESTAMP FOR COLUMN INSER00001 TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDAT00001 TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.TRANSACTION_ID_PK PRIMARY KEY( TRANSACTION_ID ) ) ; ALTER TABLE BANK_SCHEMA.TRANSACTIONS ADD CONSTRAINT BANK_SCHEMA.TRANSACTIONS_ACCOUNT_ID_FK FOREIGN KEY( ACCOUNT_ID ) REFERENCES BANK_SCHEMA.ACCOUNTS ( ACCOUNT_ID ) ON DELETE RESTRICT ON UPDATE RESTRICT ; /* Permissions and Masks */ CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ) ) ENFORCED FOR ALL ACCESS ENABLE ; CREATE MASK BANK_SCHEMA.MASK_EMAIL_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_EMAIL RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_EMAIL WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_EMAIL ELSE '****@****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_TAX_ID_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_TAX_ID RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [62.803646087646484, 62.641639709472656, 546.5366821289062, 721.3413696289062], "page": 139, "span": [0, 2313], "__ref_s3_data": null}]}, {"text": "Appendix A. Database definitions for the RCAC banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [256.8025207519531, 27.89881134033203, 517.9058227539062, 37.0754508972168], "page": 139, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "123", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.2589111328125, 27.93828010559082, 547.2587890625, 37.56678009033203], "page": 139, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "THEN C . CUSTOMER_TAX_ID WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN ( 'XXX-XX-' CONCAT QSYS2 . SUBSTR ( C . CUSTOMER_TAX_ID , 8 , 4 ) ) WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_TAX_ID ELSE 'XXX-XX-XXXX' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_DRIVERS_LICENSE_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_DRIVERS_LICENSE_NUMBER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER ELSE '*************' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_LOGIN_ID_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_LOGIN_ID RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_LOGIN_ID WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_LOGIN_ID ELSE '*****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_SECURITY_QUESTION_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_SECURITY_QUESTION RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION ELSE '*****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_SECURITY_QUESTION_ANSWER_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_SECURITY_QUESTION_ANSWER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION_ANSWER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION_ANSWER ELSE '*****' END ENABLE ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL ;", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [63.7410888671875, 85.18326568603516, 500.697265625, 720.6396484375], "page": 140, "span": [0, 1998], "__ref_s3_data": null}]}, {"text": "124", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.56851196289062, 27.93828010559082, 83.98200225830078, 37.54389572143555], "page": 140, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.52511596679688, 27.84050178527832, 339.9233093261719, 37.32281494140625], "page": 140, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_ACCOUNTS ON BANK_SCHEMA.ACCOUNTS AS A FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( A . CUSTOMER_ID IN ( SELECT C . CUSTOMER_ID FROM BANK_SCHEMA . CUSTOMERS C WHERE C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ENFORCED FOR ALL ACCESS ENABLE ; CREATE MASK BANK_SCHEMA.MASK_ACCOUNT_NUMBER_ON_ACCOUNTS ON BANK_SCHEMA.ACCOUNTS AS A FOR COLUMN ACCOUNT_NUMBER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN A . ACCOUNT_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN A . ACCOUNT_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN A . ACCOUNT_NUMBER ELSE '*****' END ENABLE ; ALTER TABLE BANK_SCHEMA.ACCOUNTS ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL ; CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_TRANSACTIONS ON BANK_SCHEMA.TRANSACTIONS AS T FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( T . ACCOUNT_ID IN ( SELECT A . ACCOUNT_ID FROM BANK_SCHEMA . ACCOUNTS A WHERE A . CUSTOMER_ID IN ( SELECT C . CUSTOMER_ID FROM BANK_SCHEMA . CUSTOMERS C WHERE C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ENFORCED FOR ALL ACCESS ENABLE ; ALTER TABLE BANK_SCHEMA.TRANSACTIONS ACTIVATE ROW ACCESS CONTROL ; /* END */", "type": "paragraph", "name": "Code", "font": null, "prov": [{"bbox": [63.56539535522461, 192.05337524414062, 530.8372802734375, 721.4015502929688], "page": 141, "span": [0, 1533], "__ref_s3_data": null}]}, {"text": "Appendix A. Database definitions for the RCAC banking example", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [256.91644287109375, 27.90251350402832, 517.9058227539062, 37.15294647216797], "page": 141, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "125", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.2833251953125, 27.93828010559082, 547.2587890625, 37.5584716796875], "page": 141, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "126", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.75247192382812, 27.93828010559082, 83.98200225830078, 37.44638442993164], "page": 142, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.78695678710938, 27.779624938964844, 340.1011047363281, 37.32666778564453], "page": 142, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Related publications", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 695.1260375976562, 299.2008056640625, 718.6505126953125], "page": 143, "span": [0, 20], "__ref_s3_data": null}]}, {"text": "The publications that are listed in this section are considered suitable for a more detailed description of the topics that are covered in this paper.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.890625, 637.7537231445312, 530.0675048828125, 660.0474853515625], "page": 143, "span": [0, 150], "__ref_s3_data": null}]}, {"text": "Other publications", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.57539367675781, 594.4069213867188, 205.97418212890625, 610.588623046875], "page": 143, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "These publications are relevant as further information sources:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9392547607422, 567.9268188476562, 413.18115234375, 578.0560913085938], "page": 143, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH IBM DB2 for i indexing methods and strategies white paper:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5774383544922, 550.9959716796875, 414.05657958984375, 561.1710815429688], "page": 143, "span": [0, 74], "__ref_s3_data": null}]}, {"text": "http://www.ibm.com/partnerworld/wps/servlet/ContentHandler/stg_ast_sys_wp_db2_i _indexing_methods_strategies", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.6581268310547, 521.37451171875, 545.9945678710938, 543.9347534179688], "page": 143, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "GLYPH IBM i Memo to Users Version 7.2 :", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6566619873047, 505.1793212890625, 299.7695007324219, 515.2197875976562], "page": 143, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgmtu.htm", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.8552703857422, 487.74090576171875, 536.167236328125, 497.69171142578125], "page": 143, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "GLYPH IBM i Version 7.2 DB2 for i SQL Reference Guide :", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4134979248047, 471.1599426269531, 371.4087829589844, 481.3167724609375], "page": 143, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/db2/rbafzintro.htm?l ang=en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.59320068359375, 442.22589111328125, 545.9945678710938, 464.06427001953125], "page": 143, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "GLYPH IBM i Version 7.2 Journal Management Guide :", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6075439453125, 424.78717041015625, 355.629150390625, 435.43304443359375], "page": 143, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzaki/rzakiprintthis .htm?lang=en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.7898712158203, 396.0626525878906, 545.9945678710938, 417.7483215332031], "page": 143, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "GLYPH IBM i Version 7.2 Security Reference Guide :", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.41650390625, 378.9584045410156, 346.3712158203125, 389.6202087402344], "page": 143, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzarl/rzarlkickoff.h tm?lang=en", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.42144775390625, 349.67706298828125, 545.9945678710938, 372.0034484863281], "page": 143, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "Online resources", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.59217834472656, 306.89630126953125, 195.1357421875, 322.5559997558594], "page": 143, "span": [0, 16], "__ref_s3_data": null}]}, {"text": "These websites are relevant as further information sources:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91726684570312, 280.17840576171875, 399.3615417480469, 290.130126953125], "page": 143, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "GLYPH Database programming topic of the IBM i 7.2 IBM Knowledge Center: http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgdbp.htm?l ang=en", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.32025146484375, 234.16583251953125, 545.9945678710938, 273.6405944824219], "page": 143, "span": [0, 168], "__ref_s3_data": null}]}, {"text": "GLYPH Identity Theft Resource Center", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.65589904785156, 216.89540100097656, 287.6542053222656, 227.5020294189453], "page": 143, "span": [0, 46], "__ref_s3_data": null}]}, {"text": "http://www.idtheftcenter.org", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.2787628173828, 199.96951293945312, 291.11822509765625, 209.83056640625], "page": 143, "span": [0, 28], "__ref_s3_data": null}]}, {"text": "GLYPH Ponemon Institute", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.48793029785156, 183.16000366210938, 231.24366760253906, 193.20037841796875], "page": 143, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "http://www.ponemon.org/", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.60269165039062, 165.8961181640625, 266.09869384765625, 176.1220703125], "page": 143, "span": [0, 23], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.9522819519043, 27.784912109375, 257.24334716796875, 37.28429412841797], "page": 143, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "127", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [530.2672119140625, 27.93828010559082, 547.2587890625, 37.63324737548828], "page": 143, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Help from IBM", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.7861328125, 705.8480834960938, 172.86196899414062, 721.7124633789062], "page": 144, "span": [0, 13], "__ref_s3_data": null}]}, {"text": "IBM Support and downloads ibm.com /support IBM Global Services ibm.com /services", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.20310974121094, 623.4984130859375, 262.6285400390625, 689.3955078125], "page": 144, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "128", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.65906524658203, 27.93828010559082, 83.98200225830078, 37.34457015991211], "page": 144, "span": [0, 3], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [98.72059631347656, 27.806060791015625, 339.8825988769531, 37.2719841003418], "page": 144, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Back cover", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [287.2200012207031, 741.251953125, 414.24481201171875, 763.4519653320312], "page": 146, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "Row and Column Access Control Support in IBM DB2 for i", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [27.0, 651.225830078125, 447.3600158691406, 719.8479614257812], "page": 146, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Implement roles and separation of duties", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [26.516427993774414, 524.8208618164062, 127.443603515625, 550.765380859375], "page": 146, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "Leverage row permissions on the database", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [26.451623916625977, 469.1280212402344, 120.283203125, 508.38104248046875], "page": 146, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "Protect columns by defining column masks", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [26.34380340576172, 413.14801025390625, 121.44960021972656, 452.6860656738281], "page": 146, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "This IBM Redpaper publication provides information about the IBM i 7.2 feature of IBM DB2 for i Row and Column Access Control (RCAC). It offers a broad description of the function and advantages of controlling access to data in a comprehensive and transparent way. This publication helps you understand the capabilities of RCAC and provides examples of defining, creating, and implementing the row permissions and column masks in a relational database environment.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [152.0505828857422, 468.4081115722656, 414.084228515625, 550.2308959960938], "page": 146, "span": [0, 464], "__ref_s3_data": null}]}, {"text": "This paper is intended for database engineers, data-centric application developers, and security officers who want to design and implement RCAC as a part of their data control and governance policy. A solid background in IBM i object level security, DB2 for i relational database concepts, and SQL is assumed.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [152.21202087402344, 403.1996765136719, 414.173828125, 461.3445129394531], "page": 146, "span": [0, 309], "__ref_s3_data": null}]}, {"text": "REDP-5110-00", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [170.9120635986328, 152.3369903564453, 232.1637420654297, 161.34877014160156], "page": 146, "span": [0, 12], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/116"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/117"}, {"text": "INTERNATIONAL TECHNICAL SUPPORT ORGANIZATION", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [466.37554931640625, 489.8393859863281, 559.809326171875, 544.2816772460938], "page": 146, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "BUILDING TECHNICAL INFORMATION BASED ON PRACTICAL EXPERIENCE", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [466.6063537597656, 405.52801513671875, 587.38916015625, 440.42242431640625], "page": 146, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "IBM Redbooks are developed by the IBM International Technical Support Organization. Experts from IBM, Customers and Partners from around the world create timely technical information based on realistic scenarios. Specific recommendations are provided to help you implement IT solutions more effectively in your environment.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [466.4356689453125, 250.36593627929688, 587.5205078125, 392.952880859375], "page": 146, "span": [0, 323], "__ref_s3_data": null}]}, {"text": "For more information: ibm.com /redbooks", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [467.00830078125, 190.48809814453125, 570.947998046875, 214.1653289794922], "page": 146, "span": [0, 39], "__ref_s3_data": null}]}], "figures": [{"bounding-box": null, "prov": [{"bbox": [513.4270629882812, 737.29345703125, 586.1854248046875, 765.7447509765625], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [13.370661735534668, 87.77820587158203, 583.5319213867188, 508.59100341796875], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [314.70001220703125, 17.213214874267578, 581.3467407226562, 82.39669799804688], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [79.84638214111328, 696.4725341796875, 142.80374145507812, 720.9906005859375], "page": 3, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.10924530029297, 102.9841079711914, 258.7627258300781, 188.21194458007812], "page": 11, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [309.1800537109375, 608.730224609375, 371.9783020019531, 634.2922973632812], "page": 12, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [310.2225646972656, 416.8152770996094, 327.72900390625, 434.5222473144531], "page": 12, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [142.5128631591797, 288.7432556152344, 251.5660858154297, 416.8715515136719], "page": 13, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [145.66848754882812, 156.9557647705078, 251.9022216796875, 264.6454772949219], "page": 13, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [141.954833984375, 607.9212036132812, 249.82046508789062, 714.8308715820312], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [141.70733642578125, 472.09759521484375, 251.4536895751953, 599.3330078125], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [141.8423614501953, 338.135009765625, 251.25462341308594, 447.3796691894531], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [141.7443389892578, 223.1272430419922, 249.32614135742188, 329.49169921875], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [141.80172729492188, 68.0157470703125, 250.79905700683594, 177.24551391601562], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [142.55819702148438, 599.4522094726562, 251.8630828857422, 714.5342407226562], "page": 15, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [142.5027618408203, 465.3537902832031, 252.00502014160156, 575.7843017578125], "page": 15, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.604942321777344, 572.5767211914062, 238.58961486816406, 722.0762939453125], "page": 17, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.91238403320312, 92.01444244384766, 491.48321533203125, 296.0242004394531], "page": 19, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 1-1 All-or-nothing access to the rows of a table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.976806640625, 103.38056945800781, 547.004638671875, 416.27178955078125], "page": 20, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 1-2 Existing row and column controls", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.40062713623047, 568.2523803710938, 238.8854217529297, 722.2219848632812], "page": 23, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.838199615478516, 576.9904174804688, 238.931640625, 721.8954467773438], "page": 29, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [137.5907440185547, 381.64495849609375, 545.1253051757812, 684.1454467773438], "page": 31, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 3-1 CREATE PERMISSION SQL statement", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.6407928466797, 377.9161682128906, 545.9530029296875, 672.5060424804688], "page": 32, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-2 CREATE MASK SQL statement", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.6437530517578, 444.2763977050781, 507.3736267089844, 714.1329345703125], "page": 33, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 3-3 ALTER PERMISSION and ALTER MASK SQL statements", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.75877380371094, 69.6054458618164, 515.210205078125, 291.2005310058594], "page": 33, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-4 ALTER TABLE SQL statement", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.23974609375, 195.40040588378906, 302.0709533691406, 408.1564025878906], "page": 35, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 3-5 Special registers and adopted authority", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.88389587402344, 421.3089294433594, 227.963134765625, 454.3084716796875], "page": 40, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 3-7 Number of employees", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.74125671386719, 302.87969970703125, 547.218505859375, 490.070068359375], "page": 42, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 3-9 Row permissions that are shown in System i Navigator", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.14590454101562, 622.0357055664062, 547.3065185546875, 696.4837036132812], "page": 44, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 3-10 Column masks shown in System i Navigator", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.97254943847656, 145.7752227783203, 530.3119506835938, 364.4224548339844], "page": 44, "span": [0, 65], "__ref_s3_data": null}], "text": "Figure 3-11 Selecting the EMPLOYEES table from System i Navigator", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.05963134765625, 453.9345703125, 547.3814086914062, 684.5645751953125], "page": 45, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 3-12 RCAC enabled on the EMPLOYEES table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [137.14126586914062, 210.01055908203125, 225.73944091796875, 243.2476043701172], "page": 45, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-13 Count of EMPLOYEES by HR", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.89012145996094, 98.30632019042969, 220.545166015625, 130.6866912841797], "page": 45, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 3-14 Count of EMPLOYEES by a manager", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [137.26792907714844, 651.4454956054688, 226.28631591796875, 684.01416015625], "page": 46, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 3-15 Count of EMPLOYEES by an employee", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.83544921875, 540.1837768554688, 228.46414184570312, 571.7149658203125], "page": 46, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 3-16 Count of EMPLOYEES by a consultant", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.2381706237793, 572.0254516601562, 238.3625946044922, 722.2628173828125], "page": 53, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.172607421875, 304.9477844238281, 495.38751220703125, 561.9093017578125], "page": 55, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-1 Internet banking example", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.94985961914062, 490.8873291015625, 451.76129150390625, 664.9793090820312], "page": 58, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-4 Data model of the banking scenario", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.25625610351562, 429.6432189941406, 417.9416198730469, 614.1981201171875], "page": 59, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-6 CUSTOMERS table attributes", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.90573120117188, 486.3748779296875, 533.0628051757812, 660.2156372070312], "page": 60, "span": [0, 59], "__ref_s3_data": null}], "text": "Figure 4-8 Reviewing the constraints on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.9095458984375, 230.82260131835938, 456.8887939453125, 420.815185546875], "page": 60, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 4-9 ACCOUNTS table attributes", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.610107421875, 495.3265686035156, 451.55133056640625, 684.3562622070312], "page": 62, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-12 TRANSACTIONS table attributes", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.1991729736328, 300.0424499511719, 344.15631103515625, 537.9190673828125], "page": 63, "span": [0, 38], "__ref_s3_data": null}], "text": "Figure 4-15 Application administration", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.1958465576172, 402.66937255859375, 527.11962890625, 684.6141967773438], "page": 64, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 4-16 Application administration for IBM i", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.93495178222656, 180.69932556152344, 528.1470947265625, 337.62982177734375], "page": 64, "span": [0, 77], "__ref_s3_data": null}], "text": "Figure 4-17 Customizing the Database Security Administrator function usage ID", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.7631378173828, 389.99383544921875, 485.8440246582031, 672.9370727539062], "page": 65, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-18 Customize Access window", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.96632385253906, 208.6089324951172, 483.9786682128906, 321.49188232421875], "page": 65, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-19 Function usage ID Database Security Administrator customized", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.90785217285156, 204.0828094482422, 358.8966369628906, 350.76531982421875], "page": 66, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-21 Creating group profiles", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.19943237304688, 430.79339599609375, 474.2098388671875, 656.1209716796875], "page": 67, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 4-22 Creating group profiles and adding users", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.22731018066406, 241.38636779785156, 269.4501037597656, 366.38726806640625], "page": 67, "span": [0, 40], "__ref_s3_data": null}], "text": "Figure 4-23 Newly created group profiles", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.93331909179688, 386.9024658203125, 455.8020324707031, 596.5308837890625], "page": 68, "span": [0, 38], "__ref_s3_data": null}], "text": "Figure 4-24 Creating a global variable", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.36389923095703, 83.51372528076172, 546.5014038085938, 298.5506896972656], "page": 68, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-25 Creating a global variable called CUSTOMER_LOGIN_ID", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.95562744140625, 553.2886352539062, 346.0622863769531, 672.4024047851562], "page": 69, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-26 Setting permissions on the CUSTOMER_LOGIN_ID global variable", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.1831817626953, 216.0868377685547, 547.692138671875, 488.07440185546875], "page": 69, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 4-27 Setting change permissions for Webuser on the CUSTOMER_LOGIN_ID global variable", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.78514099121094, 371.8719177246094, 467.8066711425781, 630.3035888671875], "page": 70, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-28 Selecting new row permissions", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.4442901611328, 264.9051513671875, 508.3151550292969, 533.2034301757812], "page": 71, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 4-29 New row permissions on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.61784362792969, 209.0849609375, 540.3091430664062, 520.9561767578125], "page": 72, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 4-30 New row permissions on the ACCOUNTS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.03852844238281, 80.2489013671875, 536.1629638671875, 444.4478759765625], "page": 73, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-31 New row permissions on the TRANSACTIONS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.7589111328125, 520.03076171875, 545.423095703125, 672.0924072265625], "page": 74, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 4-32 List of row permissions on BANK_SCHEMA", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.82342529296875, 222.84324645996094, 457.5223388671875, 396.602783203125], "page": 74, "span": [0, 34], "__ref_s3_data": null}], "text": "Figure 4-33 Creating a column mask", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.43399047851562, 188.49737548828125, 533.2398681640625, 602.9105834960938], "page": 75, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-34 Defining a column mask on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.82819747924805, 610.9564819335938, 547.4437255859375, 684.7066040039062], "page": 76, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-35 List of column masks on BANK_SCHEMA", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.8809814453125, 322.705810546875, 544.9537963867188, 430.2528991699219], "page": 76, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-36 Definition of the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.15058898925781, 191.338623046875, 546.9588623046875, 258.4830322265625], "page": 76, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-37 Adding a check constraint", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.886878967285156, 141.33029174804688, 543.0753173828125, 622.1634521484375], "page": 77, "span": [0, 68], "__ref_s3_data": null}], "text": "Figure 4-38 Specifying a new check constraint on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.629364013671875, 407.89910888671875, 547.2354125976562, 684.9134521484375], "page": 78, "span": [0, 51], "__ref_s3_data": null}], "text": "Figure 4-39 Check constraint on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.99116897583008, 192.2274627685547, 543.6686401367188, 330.8082580566406], "page": 78, "span": [0, 60], "__ref_s3_data": null}], "text": "Figure 4-40 List of check constraints on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.2755584716797, 458.7856140136719, 546.0454711914062, 618.2598876953125], "page": 79, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 4-41 Enabling RCAC on the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.9549560546875, 218.64797973632812, 534.6990356445312, 382.1351623535156], "page": 79, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-42 Enabling RCAC on ACCOUNTS", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.10528564453125, 515.156494140625, 547.2327880859375, 672.4461059570312], "page": 80, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-43 Enabling RCAC on TRANSACTIONS", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.80467987060547, 210.67916870117188, 546.7496948242188, 379.74896240234375], "page": 80, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-44 Row permissions after enabling RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.10069274902344, 553.5065307617188, 467.5866394042969, 684.43359375], "page": 81, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-45 Selecting row permission definition", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.4017105102539, 196.437255859375, 502.2218017578125, 464.38470458984375], "page": 81, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-46 Search condition of the QIBM_DEFAULT row permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.19024658203125, 106.22469329833984, 354.0876770019531, 227.25241088867188], "page": 83, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 4-50 SECURITY session user", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.16693115234375, 353.8003845214844, 546.63623046875, 501.63983154296875], "page": 84, "span": [0, 71], "__ref_s3_data": null}], "text": "Figure 4-52 SQL statement that is run by the SECURITY user - no results", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.6452178955078, 572.7899780273438, 389.1011047363281, 684.0476684570312], "page": 85, "span": [0, 78], "__ref_s3_data": null}], "text": "Figure 4-54 Number of rows that the TELLER user can see in the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.4497833251953, 569.8001098632812, 352.79534912109375, 652.5623779296875], "page": 86, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 4-56 ADMIN session user", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.02879333496094, 117.78546905517578, 354.0118713378906, 207.25164794921875], "page": 87, "span": [0, 32], "__ref_s3_data": null}], "text": "Figure 4-59 WEBUSER session user", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.8178253173828, 570.9151611328125, 547.5137329101562, 672.1438598632812], "page": 88, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-60 Setting the global variable CUSTOMER_LOGIN_ID", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.01531982421875, 272.7074890136719, 396.3285217285156, 505.56903076171875], "page": 88, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-61 Viewing the global variable value", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.8096160888672, 107.80245971679688, 381.8302307128906, 207.7640838623047], "page": 88, "span": [0, 74], "__ref_s3_data": null}], "text": "Figure 4-62 Number of rows that the WEBUSER can see in the CUSTOMERS table", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.06761169433594, 293.2961120605469, 400.2455139160156, 655.4537963867188], "page": 92, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-67 Visual Explain with no RCAC enabled", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.07376098632812, 314.4920349121094, 545.792724609375, 672.5439453125], "page": 93, "span": [0, 44], "__ref_s3_data": null}], "text": "Figure 4-68 Visual Explain with RCAC enabled", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.46057891845703, 127.6042709350586, 506.0116271972656, 238.0028076171875], "page": 93, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-69 Index advice with no RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.14990997314453, 556.6690063476562, 508.7397766113281, 672.6431884765625], "page": 94, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 4-70 Index advice with RCAC enabled", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.52901077270508, 567.5162963867188, 238.8768310546875, 722.1326904296875], "page": 95, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.02496337890625, 71.25074005126953, 527.1937866210938, 354.32598876953125], "page": 97, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 5-1 Accidental update with masked values scenario", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.44626235961914, 566.1923828125, 238.71536254882812, 721.9947509765625], "page": 101, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.21511840820312, 262.2598876953125, 413.1637878417969, 536.272216796875], "page": 103, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 6-2 Masking differences between Fieldproc and RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.0615234375, 502.3581237792969, 497.5985412597656, 616.2451782226562], "page": 104, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 6-3 RCAC and data movement", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.62368774414062, 386.83355712890625, 491.8061828613281, 636.6663208007812], "page": 105, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 6-4 RCAC effects on data movement from SOURCE", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.34181213378906, 380.9286193847656, 499.1208190917969, 636.5926513671875], "page": 106, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 6-5 RCAC effects on data movement on TARGET", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.50173950195312, 406.5937805175781, 501.6983947753906, 660.2413330078125], "page": 107, "span": [0, 61], "__ref_s3_data": null}], "text": "Figure 6-6 RCAC effects on data movement on SOURCE and TARGET", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.9111785888672, 475.1238708496094, 503.0137634277344, 684.5241088867188], "page": 108, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 6-7 Set A and set B with row permissions", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.1741485595703, 169.27420043945312, 465.0877990722656, 381.4338073730469], "page": 108, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 6-8 Inner join without RCAC permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.10928344726562, 371.35198974609375, 470.06805419921875, 604.3631591796875], "page": 109, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 6-9 Inner join with RCAC permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.9721221923828, 419.20843505859375, 478.57989501953125, 634.5538940429688], "page": 110, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 6-10 Outer join without RCAC permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.8201904296875, 370.0613708496094, 483.43121337890625, 608.5155639648438], "page": 111, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 6-11 Outer join with RCAC permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.06724548339844, 397.1817626953125, 484.16094970703125, 635.3245239257812], "page": 112, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 6-12 Exception join without RCAC permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.15040588378906, 72.79753875732422, 485.8376770019531, 308.3733825683594], "page": 112, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 6-13 Exception join with RCAC permission", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.894283294677734, 442.3710021972656, 546.3536376953125, 696.03125], "page": 114, "span": [0, 58], "__ref_s3_data": null}], "text": "Figure 6-14 Visual Explain indicating that RCAC is applied", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.08663177490234, 249.16531372070312, 546.7911376953125, 389.9174499511719], "page": 114, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-15 SQL Performance Monitor", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.8217658996582, 106.15225982666016, 546.2924194335938, 184.86444091796875], "page": 114, "span": [0, 67], "__ref_s3_data": null}], "text": "Figure 6-16 SQL Performance Monitor indicating that RCAC is applied", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.91090393066406, 573.9140625, 547.3914794921875, 684.7344360351562], "page": 115, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 6-17 SQL Performance Monitor showing statements and RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.14181518554688, 412.9127197265625, 547.3031616210938, 684.6536254882812], "page": 116, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 6-18 Index advice and RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.29962921142578, 236.79556274414062, 510.150390625, 348.3416442871094], "page": 116, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6-19 Index advisor based on the RCAC rule", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.72079467773438, 260.0303955078125, 491.8398132324219, 529.7687377929688], "page": 118, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 6-21 View definition and user query", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.38958740234375, 402.56231689453125, 509.0123291015625, 684.0966186523438], "page": 119, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-22 Query rewrite with RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.09100341796875, 390.8775634765625, 505.8097229003906, 660.002197265625], "page": 122, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 6-23 Native record access with no RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.10159301757812, 386.76556396484375, 513.560791015625, 660.6875610351562], "page": 123, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6-24 Native record level access with RCAC", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.8210906982422, 230.38075256347656, 509.4689636230469, 516.169677734375], "page": 127, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 6-25 Object-level security and RCAC permissions", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.58393096923828, 573.9718627929688, 238.72312927246094, 722.0296630859375], "page": 129, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.06692504882812, 338.255859375, 523.4048461914062, 576.7738037109375], "page": 132, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 7-1 Restoring tables to different schemas", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.37837600708008, 571.7457885742188, 238.6834716796875, 721.9522705078125], "page": 135, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.35563278198242, 567.583984375, 238.68637084960938, 721.7655639648438], "page": 137, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [475.0350341796875, 648.4136962890625, 547.1314697265625, 720.0004272460938], "page": 137, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [485.2432861328125, 737.3182983398438, 566.4600219726562, 766.057373046875], "page": 146, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [474.6000061035156, 602.410400390625, 592.139892578125, 712.2808837890625], "page": 146, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}], "tables": [{"bounding-box": null, "prov": [{"bbox": [134.7429962158203, 76.51283264160156, 549.8472290039062, 660.2257080078125], "page": 5, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 43, "data": [[{"bbox": [136.8000030517578, 650.1383666992188, 172.89404296875, 659.3513793945312], "spans": [[0, 0]], "text": "Notices", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [175.01951599121094, 650.1383666992188, 547.1898193359375, 659.3513793945312], "spans": [[0, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [136.79901123046875, 637.6585083007812, 189.86537170410156, 646.8715209960938], "spans": [[1, 0]], "text": "Trademarks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [195.3968505859375, 637.6585083007812, 547.182861328125, 646.8715209960938], "spans": [[1, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . viii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [136.79901123046875, 615.1588745117188, 279.3973083496094, 624.3718872070312], "spans": [[2, 0]], "text": "DB2 for i Center of Excellence", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [280.6194152832031, 615.1588745117188, 547.1907958984375, 624.3718872070312], "spans": [[2, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ix", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [136.79901123046875, 592.6592407226562, 172.84423828125, 601.8722534179688], "spans": [[3, 0]], "text": "Preface", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [175.01852416992188, 592.6592407226562, 547.182861328125, 601.8722534179688], "spans": [[3, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [136.79803466796875, 580.1793823242188, 547.1808471679688, 589.3923950195312], "spans": [[4, 0]], "text": "Authors . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xi", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [136.79803466796875, 567.6397705078125, 339.18292236328125, 576.852783203125], "spans": [[5, 0]], "text": "Now you can become a published author, too!", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [344.714111328125, 567.6397705078125, 547.1387939453125, 576.852783203125], "spans": [[5, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [136.79803466796875, 555.159912109375, 529.9950561523438, 564.3729248046875], "spans": [[6, 0]], "text": "Comments welcome. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [535.5494995117188, 555.159912109375, 547.1978759765625, 564.3729248046875], "spans": [[6, 1]], "text": "xiii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [136.79806518554688, 542.6800537109375, 284.0286560058594, 551.89306640625], "spans": [[7, 0]], "text": "Stay connected to IBM Redbooks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [289.54449462890625, 542.6800537109375, 547.1211547851562, 551.89306640625], "spans": [[7, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiv", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [136.79806518554688, 520.180419921875, 536.0958862304688, 529.3934326171875], "spans": [[8, 0]], "text": "Chapter 1. Securing and protecting IBM DB2 data . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [541.6468505859375, 520.180419921875, 547.1978149414062, 529.3934326171875], "spans": [[8, 1]], "text": "1", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [136.79808044433594, 508.18060302734375, 549.8472290039062, 517.3936157226562], "spans": [[9, 0]], "text": "1.1 Security fundamentals. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [136.79806518554688, 495.6409606933594, 536.1293334960938, 504.85394287109375], "spans": [[10, 0]], "text": "1.2 Current state of IBM i security . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [541.6611328125, 495.6409606933594, 547.19287109375, 504.85394287109375], "spans": [[10, 1]], "text": "2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [136.79806518554688, 483.16107177734375, 549.8472290039062, 492.3740539550781], "spans": [[11, 0]], "text": "1.3 DB2 for i security controls . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [151.19720458984375, 470.6811828613281, 536.0551147460938, 479.8941650390625], "spans": [[12, 0]], "text": "1.3.1 Existing row and column control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [541.6015014648438, 470.6811828613281, 547.14794921875, 479.8941650390625], "spans": [[12, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [151.19720458984375, 458.14154052734375, 536.080078125, 467.3545227050781], "spans": [[13, 0]], "text": "1.3.2 New controls: Row and Column Access Control. . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [541.635498046875, 458.14154052734375, 547.19091796875, 467.3545227050781], "spans": [[13, 1]], "text": "5", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [136.7970428466797, 435.64190673828125, 536.0908813476562, 444.8548889160156], "spans": [[14, 0]], "text": "Chapter 2. Roles and separation of duties . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [541.642822265625, 435.64190673828125, 547.1947631835938, 444.8548889160156], "spans": [[14, 1]], "text": "7", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [136.7970428466797, 423.64208984375, 536.1271362304688, 432.8550720214844], "spans": [[15, 0]], "text": "2.1 Roles . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [541.6658935546875, 423.64208984375, 547.2047119140625, 432.8550720214844], "spans": [[15, 1]], "text": "8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [151.19720458984375, 411.1622009277344, 535.9526977539062, 420.37518310546875], "spans": [[16, 0]], "text": "2.1.1 DDM and DRDA application server access: QIBM_DB_DDMDRDA . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [541.5558471679688, 411.1622009277344, 547.1590576171875, 420.37518310546875], "spans": [[16, 1]], "text": "8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 16, "row-header": false, "row-span": [16, 17]}], [{"bbox": [151.19720458984375, 398.68231201171875, 536.0410766601562, 407.8952941894531], "spans": [[17, 0]], "text": "2.1.2 Toolbox application server access: QIBM_DB_ZDA. . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": [541.595947265625, 398.68231201171875, 547.1508178710938, 407.8952941894531], "spans": [[17, 1]], "text": "8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 17, "row-header": false, "row-span": [17, 18]}], [{"bbox": [151.19720458984375, 386.1426696777344, 536.0748901367188, 395.35565185546875], "spans": [[18, 0]], "text": "2.1.3 Database Administrator function: QIBM_DB_SQLADM . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": [541.6302490234375, 386.1426696777344, 547.1856079101562, 395.35565185546875], "spans": [[18, 1]], "text": "9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 18, "row-header": false, "row-span": [18, 19]}], [{"bbox": [151.19720458984375, 373.66278076171875, 411.2704772949219, 382.8757629394531], "spans": [[19, 0]], "text": "2.1.4 Database Information function: QIBM_DB_SYSMON", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": [416.8177490234375, 373.66278076171875, 547.1786499023438, 382.8757629394531], "spans": [[19, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . 9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 19, "row-header": false, "row-span": [19, 20]}], [{"bbox": [151.19720458984375, 361.1828918457031, 536.035888671875, 370.3958740234375], "spans": [[20, 0]], "text": "2.1.5 Security Administrator function: QIBM_DB_SECADM . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": [541.5989379882812, 361.1828918457031, 547.1619262695312, 370.3958740234375], "spans": [[20, 1]], "text": "9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 20, "row-header": false, "row-span": [20, 21]}], [{"bbox": [151.19720458984375, 348.64324951171875, 530.5731811523438, 357.8562316894531], "spans": [[21, 0]], "text": "2.1.6 Change Function Usage CL command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": [536.1044311523438, 348.64324951171875, 547.1668701171875, 357.8562316894531], "spans": [[21, 1]], "text": "10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 21, "row-header": false, "row-span": [21, 22]}], [{"bbox": [151.19720458984375, 336.1633605957031, 530.5352172851562, 345.3763427734375], "spans": [[22, 0]], "text": "2.1.7 Verifying function usage IDs for RCAC with the FUNCTION_USAGE view . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 22, "row-header": false, "row-span": [22, 23]}, {"bbox": [536.0755004882812, 336.1633605957031, 547.156005859375, 345.3763427734375], "spans": [[22, 1]], "text": "10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 22, "row-header": false, "row-span": [22, 23]}], [{"bbox": [136.7970428466797, 323.6834716796875, 547.256591796875, 332.8964538574219], "spans": [[23, 0]], "text": "2.2 Separation of duties . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 23, "row-header": false, "row-span": [23, 24]}, {"bbox": null, "spans": [[23, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 23, "row-header": false, "row-span": [23, 24]}], [{"bbox": [136.79702758789062, 301.183837890625, 530.5396118164062, 310.3968200683594], "spans": [[24, 0]], "text": "Chapter 3. Row and Column Access Control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 24, "row-header": false, "row-span": [24, 25]}, {"bbox": [536.0916748046875, 301.183837890625, 547.19580078125, 310.3968200683594], "spans": [[24, 1]], "text": "13", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 24, "row-header": false, "row-span": [24, 25]}], [{"bbox": [136.79702758789062, 289.18402099609375, 530.4808959960938, 298.3970031738281], "spans": [[25, 0]], "text": "3.1 Explanation of RCAC and the concept of access control . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 25, "row-header": false, "row-span": [25, 26]}, {"bbox": [536.04248046875, 289.18402099609375, 547.1657104492188, 298.3970031738281], "spans": [[25, 1]], "text": "14", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 25, "row-header": false, "row-span": [25, 26]}], [{"bbox": [151.1971893310547, 276.6443786621094, 378.2078552246094, 285.85736083984375], "spans": [[26, 0]], "text": "3.1.1 Row permission and column mask definitions", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 26, "row-header": false, "row-span": [26, 27]}, {"bbox": [383.74713134765625, 276.6443786621094, 547.15576171875, 285.85736083984375], "spans": [[26, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . 14", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 26, "row-header": false, "row-span": [26, 27]}], [{"bbox": [151.1971893310547, 264.16448974609375, 530.4347534179688, 273.3774719238281], "spans": [[27, 0]], "text": "3.1.2 Enabling and activating RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 27, "row-header": false, "row-span": [27, 28]}, {"bbox": [535.9962158203125, 264.16448974609375, 547.1190795898438, 273.3774719238281], "spans": [[27, 1]], "text": "16", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 27, "row-header": false, "row-span": [27, 28]}], [{"bbox": [136.79702758789062, 251.6248321533203, 530.528076171875, 260.83782958984375], "spans": [[28, 0]], "text": "3.2 Special registers and built-in global variables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 28, "row-header": false, "row-span": [28, 29]}, {"bbox": [536.0670166015625, 251.6248321533203, 547.1448364257812, 260.83782958984375], "spans": [[28, 1]], "text": "18", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 28, "row-header": false, "row-span": [28, 29]}], [{"bbox": [151.1971893310547, 239.14495849609375, 530.4978637695312, 248.3579559326172], "spans": [[29, 0]], "text": "3.2.1 Special registers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 29, "row-header": false, "row-span": [29, 30]}, {"bbox": [536.0518798828125, 239.14495849609375, 547.159912109375, 248.3579559326172], "spans": [[29, 1]], "text": "18", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 29, "row-header": false, "row-span": [29, 30]}], [{"bbox": [151.1971893310547, 226.6650848388672, 530.5602416992188, 235.87808227539062], "spans": [[30, 0]], "text": "3.2.2 Built-in global variables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 30, "row-header": false, "row-span": [30, 31]}, {"bbox": [536.09912109375, 226.6650848388672, 547.1768798828125, 235.87808227539062], "spans": [[30, 1]], "text": "19", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 30, "row-header": false, "row-span": [30, 31]}], [{"bbox": [136.79702758789062, 214.1254425048828, 530.5302734375, 223.33843994140625], "spans": [[31, 0]], "text": "3.3 VERIFY_GROUP_FOR_USER function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 31, "row-header": false, "row-span": [31, 32]}, {"bbox": [536.0615234375, 214.1254425048828, 547.1240234375, 223.33843994140625], "spans": [[31, 1]], "text": "20", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 31, "row-header": false, "row-span": [31, 32]}], [{"bbox": [136.79702758789062, 201.64556884765625, 530.6299438476562, 210.8585662841797], "spans": [[32, 0]], "text": "3.4 Establishing and controlling accessibility by using the RCAC rule text . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 32, "row-header": false, "row-span": [32, 33]}, {"bbox": [536.1631469726562, 201.64556884765625, 547.2295532226562, 210.8585662841797], "spans": [[32, 1]], "text": "21", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 32, "row-header": false, "row-span": [32, 33]}], [{"bbox": [136.79701232910156, 189.1656951904297, 394.78179931640625, 198.37869262695312], "spans": [[33, 0]], "text": "3.5 SELECT, INSERT, and UPDATE behavior with RCAC", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 33, "row-header": false, "row-span": [33, 34]}, {"bbox": [400.3206481933594, 189.1656951904297, 547.10009765625, 198.37869262695312], "spans": [[33, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . 22", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 33, "row-header": false, "row-span": [33, 34]}], [{"bbox": [136.79701232910156, 176.6260528564453, 530.5651245117188, 185.83905029296875], "spans": [[34, 0]], "text": "3.6 Human resources example . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 34, "row-header": false, "row-span": [34, 35]}, {"bbox": [536.1119995117188, 176.6260528564453, 547.2057495117188, 185.83905029296875], "spans": [[34, 1]], "text": "22", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 34, "row-header": false, "row-span": [34, 35]}], [{"bbox": [151.19717407226562, 164.14617919921875, 530.4913940429688, 173.3591766357422], "spans": [[35, 0]], "text": "3.6.1 Assigning the QIBM_DB_SECADM function ID to the consultants. . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 35, "row-header": false, "row-span": [35, 36]}, {"bbox": [536.0463256835938, 164.14617919921875, 547.1561889648438, 173.3591766357422], "spans": [[35, 1]], "text": "23", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 35, "row-header": false, "row-span": [35, 36]}], [{"bbox": [151.19717407226562, 151.6663055419922, 530.5645751953125, 160.87930297851562], "spans": [[36, 0]], "text": "3.6.2 Creating group profiles for the users and their roles . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 36, "row-header": false, "row-span": [36, 37]}, {"bbox": [536.0960083007812, 151.6663055419922, 547.1587524414062, 160.87930297851562], "spans": [[36, 1]], "text": "23", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 36, "row-header": false, "row-span": [36, 37]}], [{"bbox": [151.19717407226562, 139.1266632080078, 530.5569458007812, 148.33966064453125], "spans": [[37, 0]], "text": "3.6.3 Demonstrating data access without RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 37, "row-header": false, "row-span": [37, 38]}, {"bbox": [536.0881958007812, 139.1266632080078, 547.1507568359375, 148.33966064453125], "spans": [[37, 1]], "text": "24", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 37, "row-header": false, "row-span": [37, 38]}], [{"bbox": [151.19717407226562, 126.64678955078125, 530.5341186523438, 135.8597869873047], "spans": [[38, 0]], "text": "3.6.4 Defining and creating row permissions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 38, "row-header": false, "row-span": [38, 39]}, {"bbox": [536.072998046875, 126.64678955078125, 547.15087890625, 135.8597869873047], "spans": [[38, 1]], "text": "25", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 38, "row-header": false, "row-span": [38, 39]}], [{"bbox": [151.19717407226562, 114.16690826416016, 339.4510498046875, 123.37991333007812], "spans": [[39, 0]], "text": "3.6.5 Defining and creating column masks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 39, "row-header": false, "row-span": [39, 40]}, {"bbox": [344.9899597167969, 114.16690826416016, 547.160888671875, 123.37991333007812], "spans": [[39, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 39, "row-header": false, "row-span": [39, 40]}], [{"bbox": [151.19717407226562, 101.62727355957031, 530.541015625, 110.84027099609375], "spans": [[40, 0]], "text": "3.6.6 Activating RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 40, "row-header": false, "row-span": [40, 41]}, {"bbox": [536.087646484375, 101.62727355957031, 547.1808471679688, 110.84027099609375], "spans": [[40, 1]], "text": "28", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 40, "row-header": false, "row-span": [40, 41]}], [{"bbox": [151.19717407226562, 89.14738464355469, 530.5750732421875, 98.36038970947266], "spans": [[41, 0]], "text": "3.6.7 Demonstrating data access with RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 41, "row-header": false, "row-span": [41, 42]}, {"bbox": [536.1066284179688, 89.14738464355469, 547.169677734375, 98.36038970947266], "spans": [[41, 1]], "text": "29", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 41, "row-header": false, "row-span": [41, 42]}], [{"bbox": [151.19717407226562, 76.6675033569336, 530.436279296875, 85.88050842285156], "spans": [[42, 0]], "text": "3.6.8 Demonstrating data access with a view and RCAC . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 42, "row-header": false, "row-span": [42, 43]}, {"bbox": [535.9984741210938, 76.6675033569336, 547.1228637695312, 85.88050842285156], "spans": [[42, 1]], "text": "32", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 42, "row-header": false, "row-span": [42, 43]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [132.29698181152344, 56.788063049316406, 547.6959228515625, 721.551025390625], "page": 6, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 49, "data": [[{"bbox": [136.8000030517578, 711.2783203125, 530.5958862304688, 720.4913330078125], "spans": [[0, 0], [0, 1]], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [536.1328125, 711.2783203125, 547.2067260742188, 720.4913330078125], "spans": [[0, 1]], "text": "37", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [136.80001831054688, 699.2785034179688, 530.5200805664062, 708.4915161132812], "spans": [[1, 0], [1, 1]], "text": "4.1 Business requirements for the RCAC banking scenario . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [536.0591430664062, 699.2785034179688, 547.13720703125, 708.4915161132812], "spans": [[1, 1]], "text": "38", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [136.80001831054688, 686.7986450195312, 530.5469970703125, 696.0116577148438], "spans": [[2, 0], [2, 1]], "text": "4.2 Description of the users roles and responsibilities . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [536.0863037109375, 686.7986450195312, 547.1648559570312, 696.0116577148438], "spans": [[2, 1]], "text": "39", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [136.80001831054688, 674.259033203125, 530.5362548828125, 683.4720458984375], "spans": [[3, 0], [3, 1]], "text": "4.3 Implementation of RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [536.0903930664062, 674.259033203125, 547.19873046875, 683.4720458984375], "spans": [[3, 1]], "text": "42", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [151.20018005371094, 661.7791748046875, 400.5744323730469, 670.9921875], "spans": [[4, 0], [4, 1]], "text": "4.3.1 Reviewing the tables that are used in this example", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [406.10546875, 661.7791748046875, 547.14697265625, 670.9921875], "spans": [[4, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . 42", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [151.20018005371094, 649.29931640625, 516.9255981445312, 658.5123291015625], "spans": [[5, 0], [5, 1]], "text": "4.3.2 Assigning function ID QIBM_DB_SECADM to the Database Engineers group", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [522.4603881835938, 649.29931640625, 547.3670654296875, 658.5123291015625], "spans": [[5, 1]], "text": ". . 47", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [151.20018005371094, 636.7597045898438, 530.5675659179688, 645.9727172851562], "spans": [[6, 0], [6, 1]], "text": "4.3.3 Creating group profiles for the users and their roles . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [536.0989379882812, 636.7597045898438, 547.1617431640625, 645.9727172851562], "spans": [[6, 1]], "text": "50", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": null, "spans": [[7, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [417.15240478515625, 624.2798461914062, 547.1438598632812, 633.4928588867188], "spans": [[7, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . 52", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [151.20018005371094, 611.7999877929688, 530.5370483398438, 621.0130004882812], "spans": [[8, 0], [8, 1]], "text": "4.3.5 Defining and creating row permissions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [536.0759887695312, 611.7999877929688, 547.1538696289062, 621.0130004882812], "spans": [[8, 1]], "text": "54", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [151.20018005371094, 599.2603759765625, 339.45404052734375, 608.473388671875], "spans": [[9, 0], [9, 1]], "text": "4.3.6 Defining and creating column masks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [536.0859985351562, 599.2603759765625, 547.1638793945312, 608.473388671875], "spans": [[9, 1]], "text": "58", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [151.20018005371094, 586.780517578125, 530.5470581054688, 608.473388671875], "spans": [[10, 0], [10, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4.3.7 Restricting the inserting and updating of masked data . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [536.078125, 586.780517578125, 547.15576171875, 595.9935302734375], "spans": [[10, 1]], "text": "60", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": null, "spans": [[11, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [536.0916137695312, 514.3016967773438, 547.1978149414062, 523.5147094726562], "spans": [[11, 1]], "text": "79", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [151.20018005371094, 561.7610473632812, 530.4820556640625, 583.513671875], "spans": [[12, 0], [12, 1]], "text": "4.3.8 Activating row and column access control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4.3.9 Reviewing row permissions. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [536.036376953125, 561.7610473632812, 547.14501953125, 570.9740600585938], "spans": [[12, 1]], "text": "64", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [151.20018005371094, 549.2811889648438, 530.44921875, 558.4942016601562], "spans": [[13, 0], [13, 1]], "text": "4.3.10 Demonstrating data access with RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [536.0194702148438, 549.2811889648438, 547.1600341796875, 558.4942016601562], "spans": [[13, 1]], "text": "66", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [151.20018005371094, 536.8013305664062, 530.4335327148438, 546.0143432617188], "spans": [[14, 0], [14, 1]], "text": "4.3.11 Query implementation with RCAC activated . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [536.0037231445312, 536.8013305664062, 547.1441040039062, 546.0143432617188], "spans": [[14, 1]], "text": "75", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [136.80001831054688, 514.3016967773438, 530.5385131835938, 523.5147094726562], "spans": [[15, 0], [15, 1]], "text": "Chapter 5. RCAC and non-SQL interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [536.099609375, 502.3018798828125, 547.1768798828125, 511.5148620605469], "spans": [[15, 1]], "text": "80", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [136.80001831054688, 502.3018798828125, 530.5609741210938, 511.5148620605469], "spans": [[16, 0], [16, 1]], "text": "5.1 Unsupported interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [136.80001831054688, 502.3018798828125, 530.5609741210938, 511.5148620605469], "spans": [[16, 0], [16, 1]], "text": "5.1 Unsupported interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 16, "row-header": false, "row-span": [16, 17]}], [{"bbox": [136.80001831054688, 477.2823486328125, 530.4937744140625, 498.9752197265625], "spans": [[17, 0], [17, 1]], "text": "5.2 Native query result differences . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.3 Accidental updates with masked values . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": [136.80001831054688, 477.2823486328125, 530.4937744140625, 498.9752197265625], "spans": [[17, 0], [17, 1]], "text": "5.2 Native query result differences . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.3 Accidental updates with masked values . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 17, "row-header": false, "row-span": [17, 18]}], [{"bbox": null, "spans": [[18, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": [536.0474853515625, 477.2823486328125, 547.1549072265625, 486.4953308105469], "spans": [[18, 1]], "text": "81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 18, "row-header": false, "row-span": [18, 19]}], [{"bbox": [136.80001831054688, 464.8024597167969, 530.5643310546875, 474.01544189453125], "spans": [[19, 0], [19, 1]], "text": "5.4 System CL commands considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": [536.0958251953125, 464.8024597167969, 547.158935546875, 474.01544189453125], "spans": [[19, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 19, "row-header": false, "row-span": [19, 20]}], [{"bbox": [151.20018005371094, 452.2628173828125, 530.4598999023438, 461.4757995605469], "spans": [[20, 0], [20, 1]], "text": "5.4.1 Create Duplicate Object (CRTDUPOBJ) command . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": [536.0228271484375, 452.2628173828125, 547.148681640625, 461.4757995605469], "spans": [[20, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 20, "row-header": false, "row-span": [20, 21]}], [{"bbox": null, "spans": [[21, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": [536.0770874023438, 439.7829284667969, 547.1549682617188, 448.99591064453125], "spans": [[21, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 21, "row-header": false, "row-span": [21, 22]}], [{"bbox": [151.20018005371094, 427.30303955078125, 530.5381469726562, 448.99591064453125], "spans": [[22, 0], [22, 1]], "text": "5.4.2 Copy File (CPYF) command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.4.3 Copy Library (CPYLIB) command. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 22, "row-header": false, "row-span": [22, 23]}, {"bbox": [151.20018005371094, 427.30303955078125, 530.5381469726562, 448.99591064453125], "spans": [[22, 0], [22, 1]], "text": "5.4.2 Copy File (CPYF) command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.4.3 Copy Library (CPYLIB) command. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 22, "row-header": false, "row-span": [22, 23]}], [{"bbox": null, "spans": [[23, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 23, "row-header": false, "row-span": [23, 24]}, {"bbox": [536.0574340820312, 427.30303955078125, 547.182861328125, 436.5160217285156], "spans": [[23, 1]], "text": "83", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 23, "row-header": false, "row-span": [23, 24]}], [{"bbox": [136.80001831054688, 404.80340576171875, 530.5385131835938, 414.0163879394531], "spans": [[24, 0], [24, 1]], "text": "Chapter 6. Additional considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 24, "row-header": false, "row-span": [24, 25]}, {"bbox": [530.4888916015625, 142.78761291503906, 547.19970703125, 152.0006103515625], "spans": [[24, 1]], "text": "108", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 24, "row-header": false, "row-span": [24, 25]}], [{"bbox": [136.80003356933594, 380.2639465332031, 530.4944458007812, 389.4769287109375], "spans": [[25, 0], [25, 1]], "text": "6.2 RCAC effects on data movement . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 25, "row-header": false, "row-span": [25, 26]}, {"bbox": [136.80003356933594, 380.2639465332031, 530.4944458007812, 389.4769287109375], "spans": [[25, 0], [25, 1]], "text": "6.2 RCAC effects on data movement . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 25, "row-header": false, "row-span": [25, 26]}], [{"bbox": [405.7154541015625, 367.7840576171875, 530.5139770507812, 376.9970397949219], "spans": [[26, 0], [26, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 26, "row-header": false, "row-span": [26, 27]}, {"bbox": [536.0606079101562, 367.7840576171875, 547.15380859375, 376.9970397949219], "spans": [[26, 1]], "text": "88", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 26, "row-header": false, "row-span": [26, 27]}], [{"bbox": [151.2001953125, 367.7840576171875, 400.1688232421875, 376.9970397949219], "spans": [[27, 0], [27, 1]], "text": "6.2.1 Effects when RCAC is defined on the source table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 27, "row-header": false, "row-span": [27, 28]}, {"bbox": [151.2001953125, 367.7840576171875, 400.1688232421875, 376.9970397949219], "spans": [[27, 0], [27, 1]], "text": "6.2.1 Effects when RCAC is defined on the source table", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 27, "row-header": false, "row-span": [27, 28]}], [{"bbox": [136.80003356933594, 330.2846374511719, 530.5997924804688, 351.9775085449219], "spans": [[28, 0], [28, 1]], "text": "6.2.3 Effects when RCAC is defined on both source and target tables . . . . . . . . . . . . . 6.3 RCAC effects on joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 28, "row-header": false, "row-span": [28, 29]}, {"bbox": [536.0498657226562, 330.2846374511719, 547.2177734375, 351.9775085449219], "spans": [[28, 1]], "text": "90 91", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 28, "row-header": false, "row-span": [28, 29]}], [{"bbox": [151.2001953125, 317.80474853515625, 547.2595825195312, 327.0177307128906], "spans": [[29, 0], [29, 1]], "text": "6.3.1 Inner joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 29, "row-header": false, "row-span": [29, 30]}, {"bbox": [151.2001953125, 317.80474853515625, 547.2595825195312, 327.0177307128906], "spans": [[29, 0], [29, 1]], "text": "6.3.1 Inner joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 29, "row-header": false, "row-span": [29, 30]}], [{"bbox": [151.20016479492188, 305.2651062011719, 547.2595825195312, 314.47808837890625], "spans": [[30, 0], [30, 1]], "text": "6.3.2 Outer joins. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 30, "row-header": false, "row-span": [30, 31]}, {"bbox": [151.20016479492188, 305.2651062011719, 547.2595825195312, 314.47808837890625], "spans": [[30, 0], [30, 1]], "text": "6.3.2 Outer joins. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 30, "row-header": false, "row-span": [30, 31]}], [{"bbox": [151.20016479492188, 292.78521728515625, 530.481201171875, 301.9981994628906], "spans": [[31, 0], [31, 1]], "text": "6.3.3 Exception joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 31, "row-header": false, "row-span": [31, 32]}, {"bbox": [536.051025390625, 292.78521728515625, 547.1907348632812, 301.9981994628906], "spans": [[31, 1]], "text": "96", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 31, "row-header": false, "row-span": [31, 32]}], [{"bbox": [136.8000030517578, 280.2455749511719, 372.92724609375, 289.45855712890625], "spans": [[32, 0], [32, 1]], "text": "6.4 Monitoring, analyzing, and debugging with RCAC", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 32, "row-header": false, "row-span": [32, 33]}, {"bbox": [536.115966796875, 280.2455749511719, 547.1796264648438, 289.45855712890625], "spans": [[32, 1]], "text": "97", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 32, "row-header": false, "row-span": [32, 33]}], [{"bbox": [378.45904541015625, 280.2455749511719, 530.5841674804688, 289.45855712890625], "spans": [[33, 0], [33, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 33, "row-header": false, "row-span": [33, 34]}, {"bbox": [178.8563232421875, 267.76568603515625, 547.1707763671875, 276.9786682128906], "spans": [[33, 1]], "text": "Query monitoring and analysis tools . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 33, "row-header": false, "row-span": [33, 34]}], [{"bbox": [151.20016479492188, 255.28578186035156, 530.5306396484375, 264.498779296875], "spans": [[34, 0], [34, 1]], "text": "6.4.2 Index advisor. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 34, "row-header": false, "row-span": [34, 35]}, {"bbox": [151.20016479492188, 255.28578186035156, 530.5306396484375, 264.498779296875], "spans": [[34, 0], [34, 1]], "text": "6.4.2 Index advisor. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 34, "row-header": false, "row-span": [34, 35]}], [{"bbox": [151.20013427734375, 242.7461395263672, 525.0111083984375, 251.95913696289062], "spans": [[35, 0], [35, 1]], "text": "6.4.3 Metadata using catalogs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 35, "row-header": false, "row-span": [35, 36]}, {"bbox": [151.20013427734375, 242.7461395263672, 525.0111083984375, 251.95913696289062], "spans": [[35, 0], [35, 1]], "text": "6.4.3 Metadata using catalogs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 35, "row-header": false, "row-span": [35, 36]}], [{"bbox": [136.7999725341797, 230.26626586914062, 524.8056640625, 239.47926330566406], "spans": [[36, 0], [36, 1]], "text": "6.5 Views, materialized query tables, and query rewrite with RCAC . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 36, "row-header": false, "row-span": [36, 37]}, {"bbox": [530.3905029296875, 230.26626586914062, 547.14501953125, 239.47926330566406], "spans": [[36, 1]], "text": "102", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 36, "row-header": false, "row-span": [36, 37]}], [{"bbox": [179.0886993408203, 205.2467498779297, 524.8568115234375, 214.45974731445312], "spans": [[37, 0], [37, 1]], "text": "Materialized query tables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 37, "row-header": false, "row-span": [37, 38]}, {"bbox": [530.4345092773438, 205.2467498779297, 547.1676635742188, 214.45974731445312], "spans": [[37, 1]], "text": "103", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 37, "row-header": false, "row-span": [37, 38]}], [{"bbox": [151.20013427734375, 205.2467498779297, 173.510986328125, 214.45974731445312], "spans": [[38, 0], [38, 1]], "text": "6.5.2", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 38, "row-header": false, "row-span": [38, 39]}, {"bbox": [530.5247192382812, 192.76687622070312, 547.1878051757812, 201.97987365722656], "spans": [[38, 1]], "text": "105", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 38, "row-header": false, "row-span": [38, 39]}], [{"bbox": [151.20013427734375, 192.76687622070312, 238.92100524902344, 201.97987365722656], "spans": [[39, 0], [39, 1]], "text": "6.5.3 Query rewrite", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 39, "row-header": false, "row-span": [39, 40]}, {"bbox": [151.20013427734375, 192.76687622070312, 238.92100524902344, 201.97987365722656], "spans": [[39, 0], [39, 1]], "text": "6.5.3 Query rewrite", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 39, "row-header": false, "row-span": [39, 40]}], [{"bbox": [136.7999725341797, 180.28700256347656, 524.9227905273438, 189.5], "spans": [[40, 0], [40, 1]], "text": "6.6 RCAC effects on performance and scalability. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 40, "row-header": false, "row-span": [40, 41]}, {"bbox": [530.4845581054688, 180.28700256347656, 547.1698608398438, 189.5], "spans": [[40, 1]], "text": "105", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 40, "row-header": false, "row-span": [40, 41]}], [{"bbox": null, "spans": [[41, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 41, "row-header": false, "row-span": [41, 42]}, {"bbox": [530.43701171875, 167.7473602294922, 547.1228637695312, 176.96035766601562], "spans": [[41, 1]], "text": "107", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 41, "row-header": false, "row-span": [41, 42]}], [{"bbox": [136.7999725341797, 155.26748657226562, 525.0469970703125, 176.96035766601562], "spans": [[42, 0], [42, 1]], "text": "6.7 Exclusive lock to implement RCAC (availability issues) . . . . . . . . . . . . . . . . . . . . . . . 6.8 Avoiding propagation of masked data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 42, "row-header": false, "row-span": [42, 43]}, {"bbox": [136.7999725341797, 155.26748657226562, 525.0469970703125, 176.96035766601562], "spans": [[42, 0], [42, 1]], "text": "6.7 Exclusive lock to implement RCAC (availability issues) . . . . . . . . . . . . . . . . . . . . . . . 6.8 Avoiding propagation of masked data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 42, "row-header": false, "row-span": [42, 43]}], [{"bbox": null, "spans": [[43, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 43, "row-header": false, "row-span": [43, 44]}, {"bbox": [530.5781860351562, 155.26748657226562, 547.1717529296875, 164.48048400878906], "spans": [[43, 1]], "text": "108", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 43, "row-header": false, "row-span": [43, 44]}], [{"bbox": null, "spans": [[44, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 44, "row-header": false, "row-span": [44, 45]}, {"bbox": [530.43359375, 117.7680892944336, 547.19677734375, 139.46096801757812], "spans": [[44, 1]], "text": "109 109", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 44, "row-header": false, "row-span": [44, 45]}], [{"bbox": null, "spans": [[45, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 45, "row-header": false, "row-span": [45, 46]}, {"bbox": [530.4365234375, 92.74856567382812, 547.2177124023438, 101.9615707397461], "spans": [[45, 1]], "text": "110", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 45, "row-header": false, "row-span": [45, 46]}], [{"bbox": null, "spans": [[46, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 46, "row-header": false, "row-span": [46, 47]}, {"bbox": [530.5514526367188, 57.76904296875, 547.2017211914062, 66.98204803466797], "spans": [[46, 1]], "text": "113", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 46, "row-header": false, "row-span": [46, 47]}], [{"bbox": null, "spans": [[47, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 47, "row-header": false, "row-span": [47, 48]}, {"bbox": [530.3995361328125, 80.26868438720703, 547.1510009765625, 89.481689453125], "spans": [[47, 1]], "text": "111", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 47, "row-header": false, "row-span": [47, 48]}], [{"bbox": [136.79995727539062, 57.76904296875, 525.0014038085938, 66.98204803466797], "spans": [[48, 0], [48, 1]], "text": "Chapter 7. Row and Column Access Control management . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 48, "row-header": false, "row-span": [48, 49]}, {"bbox": [136.79995727539062, 57.76904296875, 525.0014038085938, 66.98204803466797], "spans": [[48, 0], [48, 1]], "text": "Chapter 7. Row and Column Access Control management . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 48, "row-header": false, "row-span": [48, 49]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [135.956298828125, 482.10968017578125, 547.4403686523438, 721.7639770507812], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 17, "data": [[{"bbox": [136.79989624023438, 711.2779541015625, 524.9435424804688, 720.490966796875], "spans": [[0, 0]], "text": "7.1 Managing row permissions and column masks. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [530.4978637695312, 711.2779541015625, 547.1608276367188, 720.490966796875], "spans": [[0, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [151.20005798339844, 698.798095703125, 524.9487915039062, 708.0111083984375], "spans": [[1, 0]], "text": "7.1.1 Source management. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [530.510986328125, 698.798095703125, 547.1976318359375, 708.0111083984375], "spans": [[1, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [151.20005798339844, 686.2584838867188, 524.9796752929688, 695.4714965820312], "spans": [[2, 0]], "text": "7.1.2 Modifying definitions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [530.5341796875, 686.2584838867188, 547.1976928710938, 695.4714965820312], "spans": [[2, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [151.20005798339844, 673.7786254882812, 525.0257568359375, 682.9916381835938], "spans": [[3, 0]], "text": "7.1.3 Turning on and off . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [530.5559692382812, 673.7786254882812, 547.1466064453125, 682.9916381835938], "spans": [[3, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [151.20005798339844, 661.2987670898438, 238.93310546875, 670.5117797851562], "spans": [[4, 0]], "text": "7.1.4 Regenerating", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [244.48696899414062, 661.2987670898438, 547.1724853515625, 670.5117797851562], "spans": [[4, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [136.79989624023438, 648.7591552734375, 524.9177856445312, 657.97216796875], "spans": [[5, 0]], "text": "7.2 Managing tables with row permissions and column masks. . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [530.4720458984375, 648.7591552734375, 547.134765625, 657.97216796875], "spans": [[5, 1]], "text": "115", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [151.20005798339844, 636.279296875, 524.933349609375, 645.4923095703125], "spans": [[6, 0]], "text": "7.2.1 Save and restore. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [530.4951782226562, 636.279296875, 547.1807250976562, 645.4923095703125], "spans": [[6, 1]], "text": "115", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [151.20005798339844, 623.7994384765625, 524.9628295898438, 633.012451171875], "spans": [[7, 0]], "text": "7.2.2 Table migration . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [530.5087280273438, 623.7994384765625, 547.1466064453125, 633.012451171875], "spans": [[7, 1]], "text": "116", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [136.79989624023438, 611.2598266601562, 524.9552612304688, 620.4728393554688], "spans": [[8, 0]], "text": "7.3 Monitoring and auditing function usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [530.5089111328125, 611.2598266601562, 547.1697998046875, 620.4728393554688], "spans": [[8, 1]], "text": "117", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [136.79989624023438, 588.7601928710938, 362.6678466796875, 597.9732055664062], "spans": [[9, 0]], "text": "Chapter 8. Designing and planning for success", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [363.9595947265625, 588.7601928710938, 547.1986694335938, 597.9732055664062], "spans": [[9, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . 119", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [136.7998809814453, 576.7603759765625, 416.633056640625, 585.973388671875], "spans": [[10, 0]], "text": "8.1 Implementing RCAC with good design and proper planning", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [422.1871643066406, 576.7603759765625, 547.1546020507812, 585.973388671875], "spans": [[10, 1]], "text": ". . . . . . . . . . . . . . . . . . . 120", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [136.7998809814453, 564.280517578125, 524.86376953125, 573.4935302734375], "spans": [[11, 0]], "text": "8.2 DB2 for i Center of Excellence . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [530.440185546875, 564.280517578125, 547.1694946289062, 573.4935302734375], "spans": [[11, 1]], "text": "120", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [136.7998809814453, 541.7808837890625, 447.0309753417969, 550.993896484375], "spans": [[12, 0]], "text": "Appendix A. Database definitions for the RCAC banking example", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [447.35968017578125, 541.7808837890625, 547.2036743164062, 550.993896484375], "spans": [[12, 1]], "text": ". . . . . . . . . . . . . . 121", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [136.79989624023438, 519.7612915039062, 234.45175170898438, 528.9743041992188], "spans": [[13, 0]], "text": "Related publications", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [236.15985107421875, 519.7612915039062, 547.1917114257812, 528.9743041992188], "spans": [[13, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [136.79989624023438, 507.2814025878906, 217.75054931640625, 516.494384765625], "spans": [[14, 0]], "text": "Other publications", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [223.33541870117188, 507.2814025878906, 547.258544921875, 516.494384765625], "spans": [[14, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [136.7999267578125, 494.801513671875, 212.61610412597656, 504.0144958496094], "spans": [[15, 0]], "text": "Online resources", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [218.1934814453125, 494.801513671875, 547.258544921875, 504.0144958496094], "spans": [[15, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [136.7999267578125, 482.2618713378906, 200.54580688476562, 491.474853515625], "spans": [[16, 0]], "text": "Help from IBM", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [206.0924072265625, 482.2618713378906, 547.2077026367188, 491.474853515625], "spans": [[16, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 128", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 16, "row-header": false, "row-span": [16, 17]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [75.13301086425781, 558.7689819335938, 487.9241638183594, 590.6500244140625], "page": 10, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 3, "#-rows": 3, "data": [[{"bbox": [75.5999984741211, 581.1570434570312, 111.67109680175781, 589.4819946289062], "spans": [[0, 0]], "text": "AS/400fi", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [236.40029907226562, 581.1571044921875, 259.00469970703125, 589.4820556640625], "spans": [[0, 1]], "text": "IBMfi", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [397.2005920410156, 581.1571655273438, 445.6529541015625, 589.4821166992188], "spans": [[0, 2]], "text": "Redpaper\u2122", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [75.5999984741211, 570.1167602539062, 99.66960144042969, 578.4417114257812], "spans": [[1, 0]], "text": "DB2fi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [236.40029907226562, 570.1168212890625, 307.14569091796875, 578.4417724609375], "spans": [[1, 1]], "text": "Power Systems\u2122", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [397.2005920410156, 570.1168823242188, 455.17950439453125, 578.4418334960938], "spans": [[1, 2]], "text": "Redbooks (log o) fi System", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [75.5999984741211, 559.1367797851562, 107.3051986694336, 567.4617309570312], "spans": [[2, 0]], "text": "DRDAfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [236.40029907226562, 559.1368408203125, 283.47210693359375, 567.4617919921875], "spans": [[2, 1]], "text": "Redbooksfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [427.1544189453125, 559.1369018554688, 438.3072204589844, 567.4618530273438], "spans": [[2, 2]], "text": "ifi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [135.74322509765625, 350.148193359375, 545.6257934570312, 502.103515625], "page": 26, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2-1 FUNCTION_USAGE view", "type": "table", "#-cols": 3, "#-rows": 5, "data": [[{"bbox": [142.8000030517578, 487.1369934082031, 202.2449951171875, 495.4620056152344], "spans": [[0, 0]], "text": "Column name", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [216.8087921142578, 487.1369934082031, 257.210693359375, 495.4620056152344], "spans": [[0, 1]], "text": "Data type", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [289.47479248046875, 487.1369934082031, 338.8946838378906, 495.4620056152344], "spans": [[0, 2]], "text": "Description", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [142.8000030517578, 468.1172790527344, 203.2322998046875, 476.4422912597656], "spans": [[1, 0]], "text": "FUNCTION_ID", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [216.785400390625, 468.1172790527344, 276.00360107421875, 476.4422912597656], "spans": [[1, 1]], "text": "VARCHAR(30)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [289.45770263671875, 468.1172790527344, 359.85394287109375, 476.4422912597656], "spans": [[1, 2]], "text": "ID of the function.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [142.8000030517578, 449.156982421875, 198.66929626464844, 457.48199462890625], "spans": [[2, 0]], "text": "USER_NAME", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [216.74130249023438, 449.156982421875, 275.9234924316406, 457.48199462890625], "spans": [[2, 1]], "text": "VARCHAR(10)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [289.382080078125, 438.1166687011719, 515.0535888671875, 457.48199462890625], "spans": [[2, 2]], "text": "Name of the user profile that has a usage setting for this function.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [142.79998779296875, 419.1563720703125, 173.98318481445312, 427.48138427734375], "spans": [[3, 0]], "text": "USAGE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [216.773681640625, 419.1563720703125, 270.9797668457031, 427.48138427734375], "spans": [[3, 1]], "text": "VARCHAR(7)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [289.416259765625, 397.13604736328125, 539.1071166992188, 427.48138427734375], "spans": [[3, 2]], "text": "Usage setting: GLYPH ALLOWED: The user profile is allowed to use the function. GLYPH DENIED: The user profile is not allowed to use the function.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [142.8000030517578, 378.1163330078125, 196.2248992919922, 386.44134521484375], "spans": [[4, 0]], "text": "USER_TYPE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [216.75210571289062, 378.1163330078125, 270.99871826171875, 386.44134521484375], "spans": [[4, 1]], "text": "VARCHAR(5)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [289.4316101074219, 356.15631103515625, 448.11962890625, 386.44134521484375], "spans": [[4, 2]], "text": "Type of user profile: GLYPH USER: The user profile is a user. GLYPH GROUP: The user profile is a group.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [135.4901885986328, 202.02511596679688, 547.9369506835938, 295.5169372558594], "page": 26, "span": [0, 0], "__ref_s3_data": null}], "text": "Example 2-1 Query to determine who has authority to define and manage RCAC", "type": "table", "#-cols": 2, "#-rows": 6, "data": [[{"bbox": [136.8000030517578, 279.56719970703125, 171.26956176757812, 288.34197998046875], "spans": [[0, 0]], "text": "SELECT", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [166.78244018554688, 267.5673828125, 251.6985321044922, 288.34197998046875], "spans": [[0, 1]], "text": "function_id, user_name,", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [170.75961303710938, 255.5675811767578, 221.6990203857422, 264.34234619140625], "spans": [[1, 1]], "text": "usage,", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": null, "spans": [[2, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [167.5380859375, 243.56777954101562, 236.6987762451172, 252.342529296875], "spans": [[2, 1]], "text": "user_type", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [136.8000030517578, 231.56797790527344, 160.59396362304688, 240.3427276611328], "spans": [[3, 0]], "text": "FROM", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [178.43943786621094, 231.56797790527344, 261.7182922363281, 240.3427276611328], "spans": [[3, 1]], "text": "function_usage", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [136.8000030517578, 219.56817626953125, 162.44175720214844, 228.34292602539062], "spans": [[4, 0]], "text": "WHERE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [177.82679748535156, 219.56817626953125, 331.67730712890625, 228.34292602539062], "spans": [[4, 1]], "text": "function_id='QIBM_DB_SECADM'", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [136.8000030517578, 207.56837463378906, 178.77542114257812, 216.34312438964844], "spans": [[5, 0]], "text": "ORDER BY", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [189.269287109375, 207.56837463378906, 241.73855590820312, 216.34312438964844], "spans": [[5, 1]], "text": "user_name;", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [63.51552963256836, 70.29814147949219, 547.5021362304688, 398.0616455078125], "page": 27, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2-2 Comparison of the different function usage IDs and *JOBCTL authority", "type": "table", "#-cols": 5, "#-rows": 14, "data": [[{"bbox": [70.80030059814453, 383.1567077636719, 119.78550720214844, 391.4817199707031], "spans": [[0, 0]], "text": "User action", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [424.93804931640625, 304.9800109863281, 433.2629699707031, 344.4774475097656], "spans": [[0, 1]], "text": "*JOBCTL", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [450.1380615234375, 304.9800109863281, 458.4629821777344, 390.3999328613281], "spans": [[0, 2]], "text": "QIBM_DB_SECADM", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [475.9383544921875, 304.9800109863281, 484.2632751464844, 390.465576171875], "spans": [[0, 3]], "text": "QIBM_DB_SQLADM", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [501.13836669921875, 304.9799499511719, 534.7235717773438, 390.385498046875], "spans": [[0, 4]], "text": "QIBM_DB_SYSMON No Authority", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [70.80000305175781, 285.11700439453125, 220.1568145751953, 293.4420166015625], "spans": [[1, 0]], "text": "SET CURRENT DEGREE (SQL statement)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [429.0, 285.11700439453125, 435.00299072265625, 293.4420166015625], "spans": [[1, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [480.00030517578125, 285.11700439453125, 486.0032958984375, 293.4420166015625], "spans": [[1, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [70.80001831054688, 266.1567077636719, 264.5538024902344, 274.4817199707031], "spans": [[2, 0]], "text": "CHGQRYA command targeting a different user's job", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [429.0000305175781, 266.1567077636719, 435.0030212402344, 274.4817199707031], "spans": [[2, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [480.0003356933594, 266.1567077636719, 486.0033264160156, 274.4817199707031], "spans": [[2, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [70.800048828125, 247.1370086669922, 322.5057373046875, 255.46202087402344], "spans": [[3, 0]], "text": "STRDBMON or ENDDBMON commands targeting a different user's job", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [429.0000305175781, 247.1370086669922, 435.0030212402344, 255.46202087402344], "spans": [[3, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [480.0003356933594, 247.1370086669922, 486.0033264160156, 255.46202087402344], "spans": [[3, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [70.800048828125, 228.1173095703125, 381.0218505859375, 236.44232177734375], "spans": [[4, 0]], "text": "STRDBMON or ENDDBMON commands targeting a job that matches the current user", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [429.0000305175781, 228.1173095703125, 435.0030212402344, 236.44232177734375], "spans": [[4, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [480.0003356933594, 228.1173095703125, 511.26361083984375, 236.44232177734375], "spans": [[4, 3]], "text": "X X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [530.7603149414062, 228.1173095703125, 536.7633056640625, 236.44232177734375], "spans": [[4, 4]], "text": "X", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [70.800048828125, 209.15701293945312, 359.5173645019531, 217.48202514648438], "spans": [[5, 0]], "text": "QUSRJOBI() API format 900 or System i Navigator's SQL Details for Job", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [429.00006103515625, 209.15701293945312, 435.0030517578125, 217.48202514648438], "spans": [[5, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [480.0003662109375, 209.15701293945312, 486.00335693359375, 217.48202514648438], "spans": [[5, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [505.26068115234375, 209.15701293945312, 511.263671875, 217.48202514648438], "spans": [[5, 4]], "text": "X", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [70.80007934570312, 190.13731384277344, 220.7517852783203, 198.4623260498047], "spans": [[6, 0]], "text": "Visual Explain within Run SQL scripts", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [429.00006103515625, 190.13731384277344, 435.0030517578125, 198.4623260498047], "spans": [[6, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [480.0003662109375, 190.13731384277344, 486.00335693359375, 198.4623260498047], "spans": [[6, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [505.26068115234375, 190.13731384277344, 536.7633666992188, 198.4623260498047], "spans": [[6, 4]], "text": "X X", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [70.80007934570312, 171.11761474609375, 236.65480041503906, 179.442626953125], "spans": [[7, 0]], "text": "Visual Explain outside of Run SQL scripts", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [429.00006103515625, 171.11761474609375, 435.0030517578125, 179.442626953125], "spans": [[7, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [480.0003662109375, 171.11761474609375, 486.00335693359375, 179.442626953125], "spans": [[7, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [70.80007934570312, 152.15731811523438, 213.1296844482422, 160.48233032226562], "spans": [[8, 0]], "text": "ANALYZE PLAN CACHE procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [429.00006103515625, 152.15731811523438, 435.0030517578125, 160.48233032226562], "spans": [[8, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [480.0003662109375, 152.15731811523438, 486.00335693359375, 160.48233032226562], "spans": [[8, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [70.80007934570312, 133.1376190185547, 199.87808227539062, 141.46263122558594], "spans": [[9, 0]], "text": "DUMP PLAN CACHE procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [429.00006103515625, 133.1376190185547, 435.0030517578125, 141.46263122558594], "spans": [[9, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [480.0003662109375, 133.1376190185547, 486.00335693359375, 141.46263122558594], "spans": [[9, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [70.80007934570312, 114.11792755126953, 208.36776733398438, 122.44291687011719], "spans": [[10, 0]], "text": "MODIFY PLAN CACHE procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [429.00006103515625, 114.11792755126953, 435.0030517578125, 122.44291687011719], "spans": [[10, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [480.0003662109375, 114.11792755126953, 486.00335693359375, 122.44291687011719], "spans": [[10, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [70.80007934570312, 95.09822845458984, 411.20263671875, 103.42323303222656], "spans": [[11, 0]], "text": "MODIFY PLAN CACHE PROPERTIES procedure (currently does not check authority)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": null, "spans": [[12, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [429.00006103515625, 95.09822845458984, 435.0030517578125, 103.42323303222656], "spans": [[12, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [480.0003662109375, 95.09822845458984, 486.00335693359375, 103.42323303222656], "spans": [[12, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [70.80007934570312, 76.13793182373047, 377.1258544921875, 84.46292877197266], "spans": [[13, 0]], "text": "CHANGE PLAN CACHE SIZE procedure (currently does not check authority)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [429.00006103515625, 76.13793182373047, 435.0030517578125, 84.46292877197266], "spans": [[13, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [480.0003662109375, 76.13793182373047, 486.00335693359375, 84.46292877197266], "spans": [[13, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [63.80680847167969, 222.05039978027344, 547.7899169921875, 720.9105224609375], "page": 28, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 6, "#-rows": 22, "data": [[{"bbox": [70.80136108398438, 706.1392822265625, 119.78656768798828, 714.4642333984375], "spans": [[0, 0]], "text": "User action", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [424.93804931640625, 628.02001953125, 433.262939453125, 667.4706420898438], "spans": [[0, 1]], "text": "*JOBCTL", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [450.1380615234375, 628.02001953125, 458.46295166015625, 713.3759765625], "spans": [[0, 2]], "text": "QIBM_DB_SECADM", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [475.9383544921875, 628.02001953125, 484.26324462890625, 713.3759765625], "spans": [[0, 3]], "text": "QIBM_DB_SQLADM", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [501.13836669921875, 628.02001953125, 509.4632568359375, 713.437255859375], "spans": [[0, 4]], "text": "QIBM_DB_SYSMON", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [526.3987426757812, 628.02001953125, 534.7235107421875, 682.131591796875], "spans": [[0, 5]], "text": "No Authority", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [70.80060577392578, 608.1573486328125, 278.5827331542969, 616.4822998046875], "spans": [[1, 0]], "text": "START PLAN CACHE EVENT MONITOR procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [429.0005798339844, 608.1573486328125, 435.0035705566406, 616.4822998046875], "spans": [[1, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [480.0008850097656, 608.1573486328125, 486.0038757324219, 616.4822998046875], "spans": [[1, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [70.80059814453125, 589.1376342773438, 269.4494934082031, 597.4625854492188], "spans": [[2, 0]], "text": "END PLAN CACHE EVENT MONITOR procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [429.0005798339844, 589.1376342773438, 435.0035705566406, 597.4625854492188], "spans": [[2, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [480.0008850097656, 589.1376342773438, 486.0038757324219, 597.4625854492188], "spans": [[2, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [70.80059814453125, 570.117919921875, 293.976318359375, 578.44287109375], "spans": [[3, 0]], "text": "END ALL PLAN CACHE EVENT MONITORS procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [429.0005798339844, 570.117919921875, 435.0035705566406, 578.44287109375], "spans": [[3, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [480.0008850097656, 570.117919921875, 486.0038757324219, 578.44287109375], "spans": [[3, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [70.80059814453125, 551.1575927734375, 311.2257385253906, 559.4825439453125], "spans": [[4, 0]], "text": "Work with RCAC row permissions (Create, modify, or delete)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [454.50030517578125, 551.1575927734375, 460.5032958984375, 559.4825439453125], "spans": [[4, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [70.80059814453125, 532.1378784179688, 303.5882873535156, 540.4628295898438], "spans": [[5, 0]], "text": "Work with RCAC column masks (Create, modify, or delete)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [454.50030517578125, 532.1378784179688, 460.5032958984375, 540.4628295898438], "spans": [[5, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [70.80059814453125, 513.1181640625, 264.57958984375, 521.443115234375], "spans": [[6, 0]], "text": "Change Object Owner ( CHGOBJOWN ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [454.50030517578125, 513.1181640625, 460.5032958984375, 521.443115234375], "spans": [[6, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [70.80059814453125, 494.1578369140625, 299.39697265625, 502.48284912109375], "spans": [[7, 0]], "text": "Change Object Primary Group ( CHGOBJPGP ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [454.50030517578125, 494.1578369140625, 460.5032958984375, 502.48284912109375], "spans": [[7, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [70.80059814453125, 475.13812255859375, 266.843994140625, 483.463134765625], "spans": [[8, 0]], "text": "Grant Object Authority ( GRTOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [454.5002746582031, 475.13812255859375, 460.5032653808594, 483.463134765625], "spans": [[8, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [70.80056762695312, 456.118408203125, 271.78857421875, 464.44342041015625], "spans": [[9, 0]], "text": "Revoke Object Authority ( RVKOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [454.500244140625, 456.118408203125, 460.50323486328125, 464.44342041015625], "spans": [[9, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [70.800537109375, 437.1581115722656, 257.3543395996094, 445.4831237792969], "spans": [[10, 0]], "text": "Edit Object Authority ( EDTOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [454.500244140625, 437.1581115722656, 460.50323486328125, 445.4831237792969], "spans": [[10, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [70.800537109375, 418.1383972167969, 271.1882629394531, 426.4634094238281], "spans": [[11, 0]], "text": "Display Object Authority ( DSPOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [454.500244140625, 418.1383972167969, 460.50323486328125, 426.4634094238281], "spans": [[11, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [70.800537109375, 399.1186828613281, 237.0242462158203, 407.4436950683594], "spans": [[12, 0]], "text": "Work with Objects ( WRKOBJ ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [454.500244140625, 399.1186828613281, 460.50323486328125, 407.4436950683594], "spans": [[12, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [70.800537109375, 380.15838623046875, 238.51824951171875, 388.4833984375], "spans": [[13, 0]], "text": "Work with Libraries ( WRKLIB ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [454.5011291503906, 380.15838623046875, 460.5041198730469, 388.4833984375], "spans": [[13, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [70.80142211914062, 361.138671875, 284.7251281738281, 369.46368408203125], "spans": [[14, 0]], "text": "Add Authorization List Entry ( ADDAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [454.5010986328125, 361.138671875, 460.50408935546875, 369.46368408203125], "spans": [[14, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [70.8013916015625, 342.11895751953125, 297.70037841796875, 350.4439697265625], "spans": [[15, 0]], "text": "Change Authorization List Entry ( CHGAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [454.5010986328125, 342.11895751953125, 460.50408935546875, 350.4439697265625], "spans": [[15, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [70.8013916015625, 323.1586608886719, 299.32037353515625, 331.4836730957031], "spans": [[16, 0]], "text": "Remove Authorization List Entry ( RMVAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [454.5010986328125, 323.1586608886719, 460.50408935546875, 331.4836730957031], "spans": [[16, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 16, "row-header": false, "row-span": [16, 17]}], [{"bbox": [70.8013916015625, 304.1389465332031, 299.32037353515625, 312.4639587402344], "spans": [[17, 0]], "text": "Retrieve Authorization List Entry ( RTVAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": [454.5010986328125, 304.1389465332031, 460.50408935546875, 312.4639587402344], "spans": [[17, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 17, "row-header": false, "row-span": [17, 18]}], [{"bbox": [70.8013916015625, 285.1192321777344, 269.78509521484375, 293.4442443847656], "spans": [[18, 0]], "text": "Display Authorization List ( DSPAUTL ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": [454.5010986328125, 285.1192321777344, 460.50408935546875, 293.4442443847656], "spans": [[18, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 18, "row-header": false, "row-span": [18, 19]}], [{"bbox": [70.8013916015625, 266.158935546875, 313.63848876953125, 274.48394775390625], "spans": [[19, 0]], "text": "Display Authorization List Objects ( DSPAUTLOBJ ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": [454.5010986328125, 266.158935546875, 460.50408935546875, 274.48394775390625], "spans": [[19, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 19, "row-header": false, "row-span": [19, 20]}], [{"bbox": [70.8013916015625, 247.1392364501953, 253.48878479003906, 255.46424865722656], "spans": [[20, 0]], "text": "Edit Authorization List ( EDTAUTL ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": [454.5010681152344, 247.1392364501953, 460.5040588378906, 255.46424865722656], "spans": [[20, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 20, "row-header": false, "row-span": [20, 21]}], [{"bbox": [70.80136108398438, 228.11953735351562, 281.80908203125, 236.44454956054688], "spans": [[21, 0]], "text": "Work with Authorization Lists ( WRKAUTL ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": [454.5010681152344, 228.11953735351562, 460.5040588378906, 236.44454956054688], "spans": [[21, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 21, "row-header": false, "row-span": [21, 22]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [135.69085693359375, 588.1738891601562, 542.3914184570312, 687.73828125], "page": 35, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3-1 Special registers and their corresponding values", "type": "table", "#-cols": 2, "#-rows": 4, "data": [[{"bbox": [142.8000030517578, 673.1370239257812, 209.67091369628906, 681.4619750976562], "spans": [[0, 0]], "text": "Special register", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [230.18911743164062, 673.1370239257812, 319.9352722167969, 681.4619750976562], "spans": [[0, 1]], "text": "Corresponding value", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [142.80001831054688, 643.1364135742188, 212.7012176513672, 662.5016479492188], "spans": [[1, 0]], "text": "USER or SESSION_USER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [230.2197265625, 654.1766967773438, 467.9906921386719, 662.5016479492188], "spans": [[1, 1]], "text": "The effective user of the thread excluding adopted authority.", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [142.80003356933594, 624.11669921875, 216.63963317871094, 632.441650390625], "spans": [[2, 0]], "text": "CURRENT_USER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [230.19813537597656, 613.13671875, 535.6508178710938, 632.441650390625], "spans": [[2, 1]], "text": "The effective user of the thread including adopted authority. When no adopted authority is present, this has the same value as USER.", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [142.8009033203125, 594.1170043945312, 209.73570251464844, 602.4419555664062], "spans": [[3, 0]], "text": "SYSTEM_USER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [230.24490356445312, 594.1170043945312, 425.64569091796875, 602.4419555664062], "spans": [[3, 1]], "text": "The authorization ID that initiated the connection.", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [63.562129974365234, 496.5521545410156, 548.4708862304688, 687.6392822265625], "page": 36, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3-2 Built-in global variables", "type": "table", "#-cols": 3, "#-rows": 10, "data": [[{"bbox": [70.80000305175781, 673.1370239257812, 134.99070739746094, 681.4619750976562], "spans": [[0, 0]], "text": "Global variable", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [202.889404296875, 673.1370239257812, 223.34640502929688, 681.4619750976562], "spans": [[0, 1]], "text": "Type", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [281.8247985839844, 673.1370239257812, 331.3428039550781, 681.4619750976562], "spans": [[0, 2]], "text": "Description", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [70.80000305175781, 654.1766967773438, 132.7209014892578, 662.5016479492188], "spans": [[1, 0]], "text": "CLIENT_HOST", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [202.89028930664062, 654.1766967773438, 267.0765075683594, 662.5016479492188], "spans": [[1, 1]], "text": "VARCHAR(255)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [281.8473205566406, 654.1766967773438, 510.17547607421875, 662.5016479492188], "spans": [[1, 2]], "text": "Host name of the current client as returned by the system", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [70.80001831054688, 635.156982421875, 140.66522216796875, 643.48193359375], "spans": [[2, 0]], "text": "CLIENT_IPADDR", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [202.872314453125, 635.156982421875, 267.077392578125, 643.48193359375], "spans": [[2, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [281.8454895019531, 635.156982421875, 509.6058349609375, 643.48193359375], "spans": [[2, 2]], "text": "IP address of the current client as returned by the system", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [70.80001831054688, 616.1372680664062, 134.98263549804688, 624.4622192382812], "spans": [[3, 0]], "text": "CLIENT_PORT", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [202.90293884277344, 616.1372680664062, 242.80084228515625, 624.4622192382812], "spans": [[3, 1]], "text": "INTEGER", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [281.7978515625, 616.1372680664062, 527.5922241210938, 624.4622192382812], "spans": [[3, 2]], "text": "Port used by the current client to communicate with the server", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [70.80001831054688, 597.1175537109375, 143.50924682617188, 605.4425048828125], "spans": [[4, 0]], "text": "PACKAGE_NAME", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [202.80575561523438, 597.1175537109375, 267.0693664550781, 605.4425048828125], "spans": [[4, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [281.85186767578125, 597.1175537109375, 436.5726013183594, 605.4425048828125], "spans": [[4, 2]], "text": "Name of the currently running package", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [70.80001831054688, 578.1572265625, 156.01654052734375, 586.482177734375], "spans": [[5, 0]], "text": "PACKAGE_SCHEMA", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [202.83544921875, 578.1572265625, 267.0864562988281, 586.482177734375], "spans": [[5, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [281.8707580566406, 578.1572265625, 470.44677734375, 586.482177734375], "spans": [[5, 2]], "text": "Schema name of the currently running package", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [70.80001831054688, 559.1375122070312, 157.89932250976562, 567.4624633789062], "spans": [[6, 0]], "text": "PACKAGE_VERSION", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [202.72471618652344, 559.1375122070312, 261.9825439453125, 567.4624633789062], "spans": [[6, 1]], "text": "VARCHAR(64)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [281.7492370605469, 559.1375122070312, 478.84381103515625, 567.4624633789062], "spans": [[6, 2]], "text": "Version identifier of the currently running package", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [70.80001831054688, 540.1177978515625, 154.419921875, 548.4427490234375], "spans": [[7, 0]], "text": "ROUTINE_SCHEMA", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [202.79312133789062, 540.1177978515625, 267.0927429199219, 548.4427490234375], "spans": [[7, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [281.87164306640625, 540.1177978515625, 464.2602233886719, 548.4427490234375], "spans": [[7, 2]], "text": "Schema name of the currently running routine", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [70.80001831054688, 521.157470703125, 188.43991088867188, 529.482421875], "spans": [[8, 0]], "text": "ROUTINE_SPECIFIC_NAME", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [202.8444061279297, 521.157470703125, 267.03692626953125, 529.482421875], "spans": [[8, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [281.80682373046875, 521.157470703125, 430.40045166015625, 529.482421875], "spans": [[8, 2]], "text": "Name of the currently running routine", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [70.80003356933594, 502.1377258300781, 139.4313507080078, 510.4627380371094], "spans": [[9, 0]], "text": "ROUTINE_TYPE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [202.74635314941406, 502.1377258300781, 239.2899627685547, 510.4627380371094], "spans": [[9, 1]], "text": "CHAR(1)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [281.7906494140625, 502.1377258300781, 425.09130859375, 510.4627380371094], "spans": [[9, 2]], "text": "Type of the currently running routine", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [135.90414428710938, 86.54425811767578, 548.0491333007812, 132.3650665283203], "page": 37, "span": [0, 0], "__ref_s3_data": null}], "text": "Example 3-1 Subquery that is used as part of the rule", "type": "table", "#-cols": 2, "#-rows": 2, "data": [[{"bbox": [136.8000030517578, 116.547119140625, 316.67755126953125, 125.3218765258789], "spans": [[0, 0]], "text": "CURRENT_DATE IN (SELECT D.DATE_KEY", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [222.19522094726562, 92.54750061035156, 371.6368408203125, 113.32206726074219], "spans": [[0, 1]], "text": "DATE_MASTER D D.BUSINESS_DAY = 'Y')", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [172.38134765625, 92.54750061035156, 209.87899780273438, 113.32206726074219], "spans": [[1, 0]], "text": "FROM WHERE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [136.10394287109375, 62.45356750488281, 416.6361083984375, 98.12852478027344], "page": 42, "span": [0, 0], "__ref_s3_data": null}], "text": "Example 3-8 Creation of a mask on the DATE_OF_BIRTH column", "type": "table", "#-cols": 2, "#-rows": 3, "data": [[{"bbox": [136.8000030517578, 87.80712127685547, 193.80364990234375, 96.58187866210938], "spans": [[0, 0]], "text": "CREATE MASK", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [204.16795349121094, 87.80712127685547, 416.6361083984375, 96.58187866210938], "spans": [[0, 1]], "text": "HR_SCHEMA.MASK_DATE_OF_BIRTH_ON_EMPLOYEES", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [136.8000030517578, 75.80731201171875, 148.79383850097656, 84.58206939697266], "spans": [[1, 0]], "text": "ON", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [178.77841186523438, 75.80731201171875, 376.6766052246094, 84.58206939697266], "spans": [[1, 1]], "text": "HR_SCHEMA.EMPLOYEES AS EMPLOYEES", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [136.8000030517578, 63.80750274658203, 192.76722717285156, 72.58226013183594], "spans": [[2, 0]], "text": "FOR COLUMN", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [203.96066284179688, 63.80750274658203, 276.7180480957031, 72.58226013183594], "spans": [[2, 1]], "text": "DATE_OF_BIRTH", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [136.4228515625, 386.3492126464844, 529.3878173828125, 671.8422241210938], "page": 56, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 4-2 Rules for row and column access", "type": "table", "#-cols": 7, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [224.1096954345703, 653.607177734375, 293.8129577636719, 665.3426513671875], "spans": [[0, 1], [0, 2]], "text": "CUSTOMERS", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [224.1096954345703, 653.607177734375, 293.8129577636719, 665.3426513671875], "spans": [[0, 1], [0, 2]], "text": "CUSTOMERS", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [334.76220703125, 653.607177734375, 396.792724609375, 665.3426513671875], "spans": [[0, 3], [0, 4]], "text": "ACCOUNTS", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [334.76220703125, 653.607177734375, 396.792724609375, 665.3426513671875], "spans": [[0, 3], [0, 4]], "text": "ACCOUNTS", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [426.6241455078125, 653.607177734375, 513.3307495117188, 665.3426513671875], "spans": [[0, 5], [0, 6]], "text": "TRANSACTIONS", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 7], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [426.6241455078125, 653.607177734375, 513.3307495117188, 665.3426513671875], "spans": [[0, 5], [0, 6]], "text": "TRANSACTIONS", "type": "col_header", "col": 6, "col-header": false, "col-span": [5, 7], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [150.43690490722656, 597.8072509765625, 194.79078674316406, 607.5770874023438], "spans": [[1, 0]], "text": "SECURITY", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [213.2664031982422, 598.4434814453125, 250.56985473632812, 607.2554321289062], "spans": [[1, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [277.6850280761719, 598.4434814453125, 291.3363037109375, 607.2554321289062], "spans": [[1, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [318.52044677734375, 598.4434814453125, 355.8239440917969, 607.2554321289062], "spans": [[1, 3]], "text": "No Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [382.9391174316406, 598.4434814453125, 396.59039306640625, 607.2554321289062], "spans": [[1, 4]], "text": "Yes", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [423.7405700683594, 598.4434814453125, 461.0440673828125, 607.2554321289062], "spans": [[1, 5]], "text": "No Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [488.9488220214844, 598.4434814453125, 501.11669921875, 607.2554321289062], "spans": [[1, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [175.966796875, 559.4096069335938, 194.7865753173828, 569.179443359375], "spans": [[2, 0]], "text": "DBE", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [213.78109741210938, 560.1143798828125, 250.2053985595703, 568.9263305664062], "spans": [[2, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [277.6911926269531, 560.1143798828125, 291.5215759277344, 568.9263305664062], "spans": [[2, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [319.04132080078125, 560.1143798828125, 355.4615173339844, 568.9263305664062], "spans": [[2, 3]], "text": "All Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [382.9472961425781, 560.1143798828125, 396.7776794433594, 568.9263305664062], "spans": [[2, 4]], "text": "Yes", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [424.2634582519531, 560.1143798828125, 460.68365478515625, 568.9263305664062], "spans": [[2, 5]], "text": "All Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [488.9579772949219, 560.1143798828125, 501.30291748046875, 568.9263305664062], "spans": [[2, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [161.10870361328125, 520.9776000976562, 194.82318115234375, 530.7474365234375], "spans": [[3, 0]], "text": "ADMIN", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [213.78109741210938, 521.785400390625, 250.2053985595703, 530.5973510742188], "spans": [[3, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [278.44573974609375, 521.785400390625, 290.7906494140625, 530.5973510742188], "spans": [[3, 2]], "text": "No", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [319.04437255859375, 521.785400390625, 355.46868896484375, 530.5973510742188], "spans": [[3, 3]], "text": "All Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [383.70904541015625, 521.785400390625, 396.053955078125, 530.5973510742188], "spans": [[3, 4]], "text": "No", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [424.2737121582031, 521.785400390625, 460.6980285644531, 530.5973510742188], "spans": [[3, 5]], "text": "All Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [488.97235107421875, 521.785400390625, 501.3172607421875, 530.5973510742188], "spans": [[3, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [162.24099731445312, 482.5798645019531, 194.78195190429688, 492.3497009277344], "spans": [[4, 0]], "text": "TELLER", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [213.78109741210938, 483.4906005859375, 250.2053985595703, 492.3025207519531], "spans": [[4, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [277.6911926269531, 483.4906005859375, 291.5215759277344, 492.3025207519531], "spans": [[4, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [319.04132080078125, 483.4906005859375, 355.4615173339844, 492.3025207519531], "spans": [[4, 3]], "text": "All Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [383.7018737792969, 483.4906005859375, 396.04681396484375, 492.3025207519531], "spans": [[4, 4]], "text": "No", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [424.2665710449219, 483.4906005859375, 460.6908874511719, 492.3025207519531], "spans": [[4, 5]], "text": "All Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [488.9652099609375, 483.4906005859375, 501.31011962890625, 492.3025207519531], "spans": [[4, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [141.78970336914062, 444.18218994140625, 194.802734375, 453.9520263671875], "spans": [[5, 0]], "text": "CUSTOMER", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [220.57534790039062, 438.9849548339844, 244.4169464111328, 460.1500244140625], "spans": [[5, 1]], "text": "Own Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [278.4293212890625, 445.1615295410156, 290.7783508300781, 453.97344970703125], "spans": [[5, 2]], "text": "No", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [325.81707763671875, 438.9849548339844, 349.65869140625, 460.1500244140625], "spans": [[5, 3]], "text": "Own Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [383.6710510253906, 445.1615295410156, 396.02008056640625, 453.97344970703125], "spans": [[5, 4]], "text": "No", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [431.02484130859375, 438.9849548339844, 454.866455078125, 460.1500244140625], "spans": [[5, 5]], "text": "Own Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [488.91278076171875, 445.1615295410156, 501.2618103027344, 453.97344970703125], "spans": [[5, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [161.55479431152344, 405.78448486328125, 194.79734802246094, 415.5543212890625], "spans": [[6, 0]], "text": "PUBLIC", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [213.2664031982422, 406.8324890136719, 250.56985473632812, 415.6444091796875], "spans": [[6, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [277.6850280761719, 406.8324890136719, 291.3363037109375, 415.6444091796875], "spans": [[6, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [318.52044677734375, 406.8324890136719, 355.8239440917969, 415.6444091796875], "spans": [[6, 3]], "text": "No Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [382.9391174316406, 406.8324890136719, 396.59039306640625, 415.6444091796875], "spans": [[6, 4]], "text": "Yes", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [423.7405700683594, 406.8324890136719, 461.0440673828125, 415.6444091796875], "spans": [[6, 5]], "text": "No Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [488.9488220214844, 406.8324890136719, 501.11669921875, 415.6444091796875], "spans": [[6, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [136.258056640625, 393.7317199707031, 529.4730224609375, 684.1337280273438], "page": 57, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 4-3 Column masks", "type": "table", "#-cols": 4, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [287.8096923828125, 668.81640625, 355.4244689941406, 680.1975708007812], "spans": [[0, 2]], "text": "CUSTOMERS", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [428.24420166015625, 668.81640625, 488.26617431640625, 680.1975708007812], "spans": [[0, 3]], "text": "ACCOUNTS", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [150.03750610351562, 608.87744140625, 193.05224609375, 618.352294921875], "spans": [[1, 0]], "text": "SECURITY", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [212.63400268554688, 607.9304809570312, 248.9210205078125, 616.476318359375], "spans": [[1, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [263.1838073730469, 589.69384765625, 382.3654479980469, 635.3184814453125], "spans": [[1, 2]], "text": "CUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_ID", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.81256103515625, 609.6608276367188, 482.86053466796875, 615.3514404296875], "spans": [[1, 3]], "text": "ACCOUNT_NUMBER", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [174.79660034179688, 555.6320190429688, 193.04815673828125, 565.1068725585938], "spans": [[2, 0]], "text": "DBE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [213.1331024169922, 556.0161743164062, 248.37786865234375, 564.56201171875], "spans": [[2, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [263.1838073730469, 537.7796020507812, 382.3654479980469, 583.404296875], "spans": [[2, 2]], "text": "CUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_ID", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [427.8116149902344, 557.7466430664062, 482.92327880859375, 563.437255859375], "spans": [[2, 3]], "text": "ACCOUNT NUMBER ACCOUNT_NUMBER", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [160.38710021972656, 521.521728515625, 193.08364868164062, 530.99658203125], "spans": [[3, 0]], "text": "ADMIN", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [213.1331024169922, 520.4083862304688, 248.45372009277344, 528.9542236328125], "spans": [[3, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [315.5306091308594, 522.1387329101562, 330.12255859375, 527.829345703125], "spans": [[3, 2]], "text": "None", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [448.0835266113281, 522.1387329101562, 462.67547607421875, 527.829345703125], "spans": [[3, 3]], "text": "None", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [161.48519897460938, 489.0753479003906, 193.04367065429688, 498.5502014160156], "spans": [[4, 0]], "text": "TELLER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [213.1331024169922, 488.7939453125, 248.45372009277344, 497.3398132324219], "spans": [[4, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [263.1838073730469, 474.550537109375, 382.3654479980469, 512.1885375976562], "spans": [[4, 2]], "text": "CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER TAX ID _ _", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [448.07916259765625, 490.52423095703125, 462.6711120605469, 496.2148742675781], "spans": [[4, 3]], "text": "None", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [141.65139770507812, 457.7271423339844, 193.06382751464844, 467.2019958496094], "spans": [[5, 0]], "text": "CUSTOMER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [208.84030151367188, 457.179443359375, 252.7267608642578, 465.7253112792969], "spans": [[5, 1]], "text": "Own Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [315.5306091308594, 458.9099426269531, 330.12255859375, 464.6005859375], "spans": [[5, 2]], "text": "None", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [448.0835266113281, 458.9099426269531, 462.67547607421875, 464.6005859375], "spans": [[5, 3]], "text": "None", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [160.8197021484375, 422.5186462402344, 193.05859375, 431.9934997558594], "spans": [[6, 0]], "text": "PUBLIC", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [212.63400268554688, 421.5716247558594, 248.9210205078125, 430.11749267578125], "spans": [[6, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [263.18353271484375, 403.33502197265625, 382.3654479980469, 448.9596862792969], "spans": [[6, 2]], "text": "CUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER LOGIN ID CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_ID", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [427.81256103515625, 423.3021240234375, 482.86053466796875, 428.9927673339844], "spans": [[6, 3]], "text": "ACCOUNT_NUMBER", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [142.8543243408203, 328.035400390625, 299.9855041503906, 479.80316162109375], "page": 102, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 11, "data": [[{"bbox": [149.45120239257812, 467.8338623046875, 233.62379455566406, 474.97100830078125], "spans": [[0, 0]], "text": "CREDIT CARD NUMBER _ _", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [257.74920654296875, 467.8338623046875, 279.168212890625, 474.97100830078125], "spans": [[0, 1]], "text": "TOTAL", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [148.50592041015625, 454.30743408203125, 221.83938598632812, 461.4362487792969], "spans": [[1, 0]], "text": "3785 0000 0000 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [272.42230224609375, 454.30743408203125, 295.6497497558594, 461.4362487792969], "spans": [[1, 1]], "text": "233.50", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [148.50592041015625, 440.8002014160156, 221.83853149414062, 447.92901611328125], "spans": [[2, 0]], "text": "3785 1111 1111 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [272.42144775390625, 440.8002014160156, 295.6488952636719, 447.92901611328125], "spans": [[2, 1]], "text": "105.10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [148.50619506835938, 427.264892578125, 221.84132385253906, 434.3937072753906], "spans": [[3, 0]], "text": "3785 2222 2222 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [272.4057922363281, 427.264892578125, 295.6465759277344, 434.3937072753906], "spans": [[3, 1]], "text": "300 00 300.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [148.50619506835938, 413.7298889160156, 221.85214233398438, 420.85870361328125], "spans": [[4, 0]], "text": "3785 3333 3333 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [266.1250305175781, 413.7298889160156, 295.6675109863281, 420.85870361328125], "spans": [[4, 1]], "text": "1,775.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [148.50619506835938, 400.22265625, 221.83880615234375, 407.3514709472656], "spans": [[5, 0]], "text": "5466 4444 4444 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [272.4217529296875, 400.22265625, 295.6492004394531, 407.3514709472656], "spans": [[5, 1]], "text": "601.70", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [148.50619506835938, 386.6877136230469, 221.83880615234375, 393.8165283203125], "spans": [[6, 0]], "text": "5466 5555 5555 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [276.646484375, 386.6877136230469, 295.6483154296875, 393.8165283203125], "spans": [[6, 1]], "text": "37.80", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [148.50619506835938, 373.1529541015625, 221.83880615234375, 380.2817687988281], "spans": [[7, 0]], "text": "5466 6666 6666 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [272.4217529296875, 373.1529541015625, 295.6492004394531, 380.2817687988281], "spans": [[7, 1]], "text": "490.45", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [148.50619506835938, 359.6453857421875, 221.84132385253906, 366.7742004394531], "spans": [[8, 0]], "text": "6011 7777 7777 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [268.1813049316406, 359.6453857421875, 295.6460266113281, 366.7742004394531], "spans": [[8, 1]], "text": "1005.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [148.50619506835938, 346.11041259765625, 221.83880615234375, 353.2392272949219], "spans": [[9, 0]], "text": "6011 8888 8888 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [272.4217529296875, 346.11041259765625, 295.6492004394531, 353.2392272949219], "spans": [[9, 1]], "text": "750.33", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [148.50619506835938, 332.5756530761719, 221.83880615234375, 339.7044677734375], "spans": [[10, 0]], "text": "6011 9999 9999 0001", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [276.646484375, 332.5756530761719, 295.6483154296875, 339.7044677734375], "spans": [[10, 1]], "text": "10.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}]], "model": null}, {"bounding-box": null, "prov": [{"bbox": [313.2283020019531, 328.81744384765625, 469.1299743652344, 479.4169006347656], "page": 102, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 6-1 Timing of column masking", "type": "table", "#-cols": 2, "#-rows": 11, "data": [[{"bbox": [318.9862060546875, 467.8338623046875, 403.1588134765625, 474.97100830078125], "spans": [[0, 0]], "text": "CREDIT CARD NUMBER _ _", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [427.28424072265625, 467.8338623046875, 448.7032470703125, 474.97100830078125], "spans": [[0, 1]], "text": "TOTAL", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [318.041015625, 454.30743408203125, 390.6849365234375, 461.4362487792969], "spans": [[1, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [441.9682312011719, 454.30743408203125, 465.16064453125, 461.4362487792969], "spans": [[1, 1]], "text": "233.50", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [318.041015625, 440.8002014160156, 390.6849365234375, 447.92901611328125], "spans": [[2, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [441.9682312011719, 440.8002014160156, 465.16064453125, 447.92901611328125], "spans": [[2, 1]], "text": "105.10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [318.0412902832031, 427.264892578125, 390.6543273925781, 434.3937072753906], "spans": [[3, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [441.9408874511719, 427.264892578125, 465.1816711425781, 434.3937072753906], "spans": [[3, 1]], "text": "300 00 300.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [318.0412902832031, 413.7298889160156, 390.69854736328125, 420.85870361328125], "spans": [[4, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [435.6726379394531, 413.7298889160156, 465.1684265136719, 420.85870361328125], "spans": [[4, 1]], "text": "1,775.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [318.0412902832031, 400.22265625, 390.6852111816406, 407.3514709472656], "spans": [[5, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [441.968505859375, 400.22265625, 465.1609191894531, 407.3514709472656], "spans": [[5, 1]], "text": "601.70", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [318.0412902832031, 386.6877136230469, 390.6852111816406, 393.8165283203125], "spans": [[6, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [446.19329833984375, 386.6877136230469, 465.16595458984375, 393.8165283203125], "spans": [[6, 1]], "text": "37.80", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [318.0412902832031, 373.1529541015625, 390.6852111816406, 380.2817687988281], "spans": [[7, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [441.968505859375, 373.1529541015625, 465.1609191894531, 380.2817687988281], "spans": [[7, 1]], "text": "490.45", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [318.0412902832031, 359.6453857421875, 390.6678771972656, 366.7745361328125], "spans": [[8, 0]], "text": "**** **** **** 1234 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [437.7164001464844, 359.6453857421875, 465.1811218261719, 366.7742004394531], "spans": [[8, 1]], "text": "1005.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [318.0412902832031, 346.11041259765625, 390.6852111816406, 353.2392272949219], "spans": [[9, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [441.968505859375, 346.11041259765625, 465.1609191894531, 353.2392272949219], "spans": [[9, 1]], "text": "750.33", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [318.0412902832031, 332.5756530761719, 390.6852111816406, 339.7044677734375], "spans": [[10, 0]], "text": "**** **** **** 0001", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [446.19329833984375, 332.5756530761719, 465.16595458984375, 339.7044677734375], "spans": [[10, 1]], "text": "10.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}]], "model": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}, {"height": 792.0, "page": 15, "width": 612.0}, {"height": 792.0, "page": 16, "width": 612.0}, {"height": 792.0, "page": 17, "width": 612.0}, {"height": 792.0, "page": 18, "width": 612.0}, {"height": 792.0, "page": 19, "width": 612.0}, {"height": 792.0, "page": 20, "width": 612.0}, {"height": 792.0, "page": 21, "width": 612.0}, {"height": 792.0, "page": 22, "width": 612.0}, {"height": 792.0, "page": 23, "width": 612.0}, {"height": 792.0, "page": 24, "width": 612.0}, {"height": 792.0, "page": 25, "width": 612.0}, {"height": 792.0, "page": 26, "width": 612.0}, {"height": 792.0, "page": 27, "width": 612.0}, {"height": 792.0, "page": 28, "width": 612.0}, {"height": 792.0, "page": 29, "width": 612.0}, {"height": 792.0, "page": 30, "width": 612.0}, {"height": 792.0, "page": 31, "width": 612.0}, {"height": 792.0, "page": 32, "width": 612.0}, {"height": 792.0, "page": 33, "width": 612.0}, {"height": 792.0, "page": 34, "width": 612.0}, {"height": 792.0, "page": 35, "width": 612.0}, {"height": 792.0, "page": 36, "width": 612.0}, {"height": 792.0, "page": 37, "width": 612.0}, {"height": 792.0, "page": 38, "width": 612.0}, {"height": 792.0, "page": 39, "width": 612.0}, {"height": 792.0, "page": 40, "width": 612.0}, {"height": 792.0, "page": 41, "width": 612.0}, {"height": 792.0, "page": 42, "width": 612.0}, {"height": 792.0, "page": 43, "width": 612.0}, {"height": 792.0, "page": 44, "width": 612.0}, {"height": 792.0, "page": 45, "width": 612.0}, {"height": 792.0, "page": 46, "width": 612.0}, {"height": 792.0, "page": 47, "width": 612.0}, {"height": 792.0, "page": 48, "width": 612.0}, {"height": 792.0, "page": 49, "width": 612.0}, {"height": 792.0, "page": 50, "width": 612.0}, {"height": 792.0, "page": 51, "width": 612.0}, {"height": 792.0, "page": 52, "width": 612.0}, {"height": 792.0, "page": 53, "width": 612.0}, {"height": 792.0, "page": 54, "width": 612.0}, {"height": 792.0, "page": 55, "width": 612.0}, {"height": 792.0, "page": 56, "width": 612.0}, {"height": 792.0, "page": 57, "width": 612.0}, {"height": 792.0, "page": 58, "width": 612.0}, {"height": 792.0, "page": 59, "width": 612.0}, {"height": 792.0, "page": 60, "width": 612.0}, {"height": 792.0, "page": 61, "width": 612.0}, {"height": 792.0, "page": 62, "width": 612.0}, {"height": 792.0, "page": 63, "width": 612.0}, {"height": 792.0, "page": 64, "width": 612.0}, {"height": 792.0, "page": 65, "width": 612.0}, {"height": 792.0, "page": 66, "width": 612.0}, {"height": 792.0, "page": 67, "width": 612.0}, {"height": 792.0, "page": 68, "width": 612.0}, {"height": 792.0, "page": 69, "width": 612.0}, {"height": 792.0, "page": 70, "width": 612.0}, {"height": 792.0, "page": 71, "width": 612.0}, {"height": 792.0, "page": 72, "width": 612.0}, {"height": 792.0, "page": 73, "width": 612.0}, {"height": 792.0, "page": 74, "width": 612.0}, {"height": 792.0, "page": 75, "width": 612.0}, {"height": 792.0, "page": 76, "width": 612.0}, {"height": 792.0, "page": 77, "width": 612.0}, {"height": 792.0, "page": 78, "width": 612.0}, {"height": 792.0, "page": 79, "width": 612.0}, {"height": 792.0, "page": 80, "width": 612.0}, {"height": 792.0, "page": 81, "width": 612.0}, {"height": 792.0, "page": 82, "width": 612.0}, {"height": 792.0, "page": 83, "width": 612.0}, {"height": 792.0, "page": 84, "width": 612.0}, {"height": 792.0, "page": 85, "width": 612.0}, {"height": 792.0, "page": 86, "width": 612.0}, {"height": 792.0, "page": 87, "width": 612.0}, {"height": 792.0, "page": 88, "width": 612.0}, {"height": 792.0, "page": 89, "width": 612.0}, {"height": 792.0, "page": 90, "width": 612.0}, {"height": 792.0, "page": 91, "width": 612.0}, {"height": 792.0, "page": 92, "width": 612.0}, {"height": 792.0, "page": 93, "width": 612.0}, {"height": 792.0, "page": 94, "width": 612.0}, {"height": 792.0, "page": 95, "width": 612.0}, {"height": 792.0, "page": 96, "width": 612.0}, {"height": 792.0, "page": 97, "width": 612.0}, {"height": 792.0, "page": 98, "width": 612.0}, {"height": 792.0, "page": 99, "width": 612.0}, {"height": 792.0, "page": 100, "width": 612.0}, {"height": 792.0, "page": 101, "width": 612.0}, {"height": 792.0, "page": 102, "width": 612.0}, {"height": 792.0, "page": 103, "width": 612.0}, {"height": 792.0, "page": 104, "width": 612.0}, {"height": 792.0, "page": 105, "width": 612.0}, {"height": 792.0, "page": 106, "width": 612.0}, {"height": 792.0, "page": 107, "width": 612.0}, {"height": 792.0, "page": 108, "width": 612.0}, {"height": 792.0, "page": 109, "width": 612.0}, {"height": 792.0, "page": 110, "width": 612.0}, {"height": 792.0, "page": 111, "width": 612.0}, {"height": 792.0, "page": 112, "width": 612.0}, {"height": 792.0, "page": 113, "width": 612.0}, {"height": 792.0, "page": 114, "width": 612.0}, {"height": 792.0, "page": 115, "width": 612.0}, {"height": 792.0, "page": 116, "width": 612.0}, {"height": 792.0, "page": 117, "width": 612.0}, {"height": 792.0, "page": 118, "width": 612.0}, {"height": 792.0, "page": 119, "width": 612.0}, {"height": 792.0, "page": 120, "width": 612.0}, {"height": 792.0, "page": 121, "width": 612.0}, {"height": 792.0, "page": 122, "width": 612.0}, {"height": 792.0, "page": 123, "width": 612.0}, {"height": 792.0, "page": 124, "width": 612.0}, {"height": 792.0, "page": 125, "width": 612.0}, {"height": 792.0, "page": 126, "width": 612.0}, {"height": 792.0, "page": 127, "width": 612.0}, {"height": 792.0, "page": 128, "width": 612.0}, {"height": 792.0, "page": 129, "width": 612.0}, {"height": 792.0, "page": 130, "width": 612.0}, {"height": 792.0, "page": 131, "width": 612.0}, {"height": 792.0, "page": 132, "width": 612.0}, {"height": 792.0, "page": 133, "width": 612.0}, {"height": 792.0, "page": 134, "width": 612.0}, {"height": 792.0, "page": 135, "width": 612.0}, {"height": 792.0, "page": 136, "width": 612.0}, {"height": 792.0, "page": 137, "width": 612.0}, {"height": 792.0, "page": 138, "width": 612.0}, {"height": 792.0, "page": 139, "width": 612.0}, {"height": 792.0, "page": 140, "width": 612.0}, {"height": 792.0, "page": 141, "width": 612.0}, {"height": 792.0, "page": 142, "width": 612.0}, {"height": 792.0, "page": 143, "width": 612.0}, {"height": 792.0, "page": 144, "width": 612.0}, {"height": 792.0, "page": 145, "width": 612.0}, {"height": 792.0, "page": 146, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "redp5110.pdf", "filename-prov": null, "document-hash": "3f8b6f0cb6d21ff16bdd7254c47ba72984b7ed1b70114e833c30f19be5366ad6", "#-pages": 146, "collection-name": null, "description": null, "page-hashes": [{"hash": "042dcdd712c3671577114114227f75ce1b5fe22a78e589c60b27d3c414ca914e", "model": "default", "page": 1}, {"hash": "19c7033f317f569819298dcaf98d4fd119632b01b323f3e244b6c14cd46b27b0", "model": "default", "page": 2}, {"hash": "1650a40ffe39a2240d05bdf5a7297a9e7de9c2564373213b732eb2009de23fd5", "model": "default", "page": 3}, {"hash": "fd0e00135169f317b2e2ab993cc64383dca2511f4a9e954563050a69dbefc35f", "model": "default", "page": 4}, {"hash": "dd607eefa7f279633dce503515463003c0167d6e1480e41daf39d95a03b02156", "model": "default", "page": 5}, {"hash": "69724844504d443f2f7dabc9d6cc912e26f1aba1fc51ddb2f248aa6f8da70505", "model": "default", "page": 6}, {"hash": "3ca620d960ef23d3419b3de71eb985eaa9bd54b7c1463116d4d11f64ab6515a8", "model": "default", "page": 7}, {"hash": "f360d9c1a29f5d9cc38f7a149b5e82ae9c177dedf534141f5d96d41792ccca01", "model": "default", "page": 8}, {"hash": "aaee7dcc87c982f44b3311ea587d9fee5d510de9567f84832e8b2effbf5e4c49", "model": "default", "page": 9}, {"hash": "f54ad5009578acd50e29ddf9e764f3894aef129245709bdda6695aca35080ef1", "model": "default", "page": 10}, {"hash": "35f70e10a2408e0395dfa9e894c5173186ac4481f414e41666e0be54f194accd", "model": "default", "page": 11}, {"hash": "64e97a3d553d9443178aae195f16f327cf503bb9c6930fe13af66b9fed277578", "model": "default", "page": 12}, {"hash": "995809366f67a29d338e5d08064a21a5bcda880bb0fe9d31085a3361059cf9ca", "model": "default", "page": 13}, {"hash": "b33a9cb89864b8461e994bc178c0f348722a75445a176a0ff059a1f1c6013c38", "model": "default", "page": 14}, {"hash": "37b17e27e1e6d405ed9c79a1282703930b1e8e1bff6b849a19ce614e5f874577", "model": "default", "page": 15}, {"hash": "ed6d8cc30effd85fb3a8b189732a80dd1d56dbc7fa4f079cd6d16f6084f4545a", "model": "default", "page": 16}, {"hash": "a355435891596f80e1ea7f3feef6b93a4f82caf62044e09a86e9ce2e02236715", "model": "default", "page": 17}, {"hash": "1d071bfa86d2d97bc7251f5f837deb4b3b72f422b79f76a83457210d40125b2a", "model": "default", "page": 18}, {"hash": "a74e58c9cd8ff01b37e4fe7df505cf495b9c1892db449b93e9076bb71fbd2ef2", "model": "default", "page": 19}, {"hash": "e83cbcc9e475190599ffc079b9266548d97fe0de76a0cb33c9fd50ef25237242", "model": "default", "page": 20}, {"hash": "c52304c295fd7f20396f82ab2bad8f0a085f067afc5692772fb9391ea880bcde", "model": "default", "page": 21}, {"hash": "86497e2615bb82251139e933e8e64153814e4ba46a499195083de8da6f5b89f9", "model": "default", "page": 22}, {"hash": "925398aa64327096c129a383e4bbec2eb083163878227c2d4e3166b44207fc03", "model": "default", "page": 23}, {"hash": "9d4e3d06a5f05410069b2b9486ec876c0e749fc8287c5d2c89940f4c44af96b5", "model": "default", "page": 24}, {"hash": "3956d5e714edf8547117687948339cc61c0727eaea2e2ad3b81e87963c1b73f0", "model": "default", "page": 25}, {"hash": "0bb0e09bd6e39cfc3da30376daecd1ad025ac38727078fd57ed04ab76e6dc8f3", "model": "default", "page": 26}, {"hash": "45005581d511136999fbc537f9465bb0b068b312ece0b9dcffe8f47a2af795fd", "model": "default", "page": 27}, {"hash": "4250019942cd107c8068cdf7c0c40c32f1735b6cd39e83eebd6b88f15f7af945", "model": "default", "page": 28}, {"hash": "d932d7afb19cda22b09acd96262695d080061df5f6f61323bbf3151b44707b0f", "model": "default", "page": 29}, {"hash": "bf6eb386ea506279669df237b54e8d789fa70b12d2830a42649632e5b057343f", "model": "default", "page": 30}, {"hash": "5dea54e30c89afe307a397ed24e083324991a1ddb17b94119f149183c1592cd7", "model": "default", "page": 31}, {"hash": "40fac6dd979f00f24fdcd1f07afad352b233f6926b8dfc8315e47c5304df1009", "model": "default", "page": 32}, {"hash": "40378b24c9b151d146ccd959a701dddfc8d9bac79a2075706c34d22dc185afd1", "model": "default", "page": 33}, {"hash": "935989acb8f1108365160d6428516b2b5cca95e12c75fb33818a33ad20730014", "model": "default", "page": 34}, {"hash": "570c8b11193a5b9e26d2b5a680c137cc6acbbb3c4c8dbfd02e96410f67444fab", "model": "default", "page": 35}, {"hash": "9f21fc6a00cee78376ee9fc31eb93ae5f0cde918f78b361f1ff0d2a1db7dfc01", "model": "default", "page": 36}, {"hash": "0e68f946bcdf7f573d88eed366216b5ba0ed470fcab1a783bcfb894802bf284e", "model": "default", "page": 37}, {"hash": "6ca7e5139b0a1993e0dd093698a9df1c1201091e509ec715d25c871c05a0863e", "model": "default", "page": 38}, {"hash": "c441267b99ad21ec04958ba35dcd465ce775b2c51c03ba67a4cfbb76f9955907", "model": "default", "page": 39}, {"hash": "aa16dbe8fa7fcd0634cf4930aa82a13c4f2d8621e759cec9c3097c15975551d2", "model": "default", "page": 40}, {"hash": "a1994f1ff203311afdc2424fedfad6f0429ccefb39ef62f7107ff75934404093", "model": "default", "page": 41}, {"hash": "92f8bad908b6a17adb727f822d8f77b673f79db90763faa32a648d89de97a0ae", "model": "default", "page": 42}, {"hash": "7cde568961d0f4ab1186b75a8d4f024a56b5065814f2050e7deda89fcb940064", "model": "default", "page": 43}, {"hash": "2d6e9fa06bae3a81449a646b629af6332dfc5780e5787e89a1eb491e60a8b95f", "model": "default", "page": 44}, {"hash": "c3c1468d8e9bbca1ac57cb97b7d6e191e3138cd98c919473a3deab89982d46fa", "model": "default", "page": 45}, {"hash": "3efc7b8e4918efef458011a9d564a062ba25e10f1b1998db385c746404995af2", "model": "default", "page": 46}, {"hash": "c96cd910329a52e1c256c61bafef7551e838ffe55cfc8de60ab8d1770a614d2a", "model": "default", "page": 47}, {"hash": "ed43a8e94b831c81406d263c7e72cb18279ff682bf82ca21d26bc8eaf58939b7", "model": "default", "page": 48}, {"hash": "beaba63670852ef3937e53edfd9c65e8381ccad289cf377ea1819ed4499649a5", "model": "default", "page": 49}, {"hash": "029387a73b937661bd354c45643d77243aae30a9e1dd692c26cadab54b33f630", "model": "default", "page": 50}, {"hash": "96cee9e611cde6da9b28630ae44aa4dddfb372bec1ad1400a4e5e0c641c18e9b", "model": "default", "page": 51}, {"hash": "d5f7a2c44833429eec81845b03adc589ed3fa9dbacfb90cbe3ac733cfb86306c", "model": "default", "page": 52}, {"hash": "0e398142d223dfaf46ad1d76702b89aa208b23fdc9f5fb7aaba1472a9db53b7b", "model": "default", "page": 53}, {"hash": "59664e9cadd6da670dd867311b1c5d9789cd944186e8ff42375b9719ddc43cf9", "model": "default", "page": 54}, {"hash": "5e4e6eaeafaf43a18590db6079f775401f7689d694cda14516fb000f7d85885c", "model": "default", "page": 55}, {"hash": "68496b0fe32a5149c0d6e70fef47ac02544a1db8176b6fa31c2c4bc59b35f933", "model": "default", "page": 56}, {"hash": "ac1bffe2a57f4b9f610dac9745f85bf8029c04e6279bae1fd942b030ca7e3635", "model": "default", "page": 57}, {"hash": "42616e9b91f856e761cf994d852d7c913e50b2fc00ce04e71cd28d51a4c88bf1", "model": "default", "page": 58}, {"hash": "4e9917d93adf25e36c0eeb37beb7881df8d8de40b23fdcde3f8c35e8867b4f7b", "model": "default", "page": 59}, {"hash": "7a484f738feda7e2327ce3bae87e5989b008d1309008f5fc237a681be7b4780c", "model": "default", "page": 60}, {"hash": "2957be6c48ca15c71ae2d63191e3ec999a65771e444c197828a2efe54aad7dee", "model": "default", "page": 61}, {"hash": "81d885ff0652b16f490f2bdf49bf5b2f85bdea4ea7dc85f98de238b437812522", "model": "default", "page": 62}, {"hash": "c0a9752603b861a7c13d678d1c89174f140ae5ef1fc4af32a872ae99bd09b494", "model": "default", "page": 63}, {"hash": "9fa129577bad65520977b6742108edd287a8413c1f002a0fcde9e8d4649e5ca3", "model": "default", "page": 64}, {"hash": "720722b50e586615b5a55451ec49b89048aecbb7450b7bf952ab7b8cab856b63", "model": "default", "page": 65}, {"hash": "91c76d552d29f2d09c34608319dd7729bd1309ccfadd56f22a00d25e8bbce771", "model": "default", "page": 66}, {"hash": "d9a6a973665fd160fb9cf52d6444cd4be6bf5a977666b625f58858ba507b0ee2", "model": "default", "page": 67}, {"hash": "dcc11d3809231dfdbe15f28126c3c6c7016f0d239c48829860133e645f0b4e9e", "model": "default", "page": 68}, {"hash": "18f5746455a39ff66f0d83bf5dcc45151e5313ccf038da38b25195a135445d23", "model": "default", "page": 69}, {"hash": "6f150521a19ebcc1dc711a861d26a1447ee33c01d770b6e985ed23ac4c3bce0b", "model": "default", "page": 70}, {"hash": "2675ed680861667ca9a8eb01fffa6b1ffc5c682d1217a7ee211ee1a14f066301", "model": "default", "page": 71}, {"hash": "cc1b3ad555bc13b0266cc1dd1646f6703b96043a17865254191fb28200897100", "model": "default", "page": 72}, {"hash": "d69dc0543126dbc6d00e1e8ce512bbf99efcda00f45cae9ab93877fc9e833308", "model": "default", "page": 73}, {"hash": "3afbdd3081b903b7941e16a1b3e0feebb23b70fa6a850e3b1119172763263fdb", "model": "default", "page": 74}, {"hash": "9ab6f9e4fd7c147650dbf4b3226a4805d3e3a86af0be0496be4cbd7eb2fe38dc", "model": "default", "page": 75}, {"hash": "3cd1d3fe8ed3a77aeaf1b68c9faa81fdc1209f44b20dd695826bfb009497af91", "model": "default", "page": 76}, {"hash": "3e0d46cb61ec6ec6ba1aa5f21e61d8988b7c531c3928c1cfa2ea5a35c5f7556f", "model": "default", "page": 77}, {"hash": "1d2d26c6366591fa7103e6920121f20b7d47e252f8e5598bc9b0d10d88b0a876", "model": "default", "page": 78}, {"hash": "6b74896cf6d9d79d6eea588138972973314a1e883e4a92eb39533e096e5fea4c", "model": "default", "page": 79}, {"hash": "2b53410a79b04ddd9d95ca46742e1916b631d56c91e67426449a2f48303233c9", "model": "default", "page": 80}, {"hash": "1cad2f44f63e2c43c0950ba8863f3a3d0f2f4afa1ae6f9ca2ceb992a34061d98", "model": "default", "page": 81}, {"hash": "1fd53dcb8bd415d94cbebe26f4938b10551f29603658e5d92b9932d2179878ba", "model": "default", "page": 82}, {"hash": "4ef9b11fb0f67f1227d7241f38a68b1e7d12cccb90802424b6fc139e84e73241", "model": "default", "page": 83}, {"hash": "1c2ea11640d6d0298f383f42acc541cee1d082453dc6c201fbd0dfe2c3583a6d", "model": "default", "page": 84}, {"hash": "fe89905acb289f8126f56f0fa57b0032cf459757a285a28e18a4fa79d0f37ff5", "model": "default", "page": 85}, {"hash": "897bc2fcbbd0147b2ad32d7130836346100dd1f483bb904be454bddee79032d3", "model": "default", "page": 86}, {"hash": "c8e638b82bad37d6d6528852ca8f58d16aa6de3ae113f9f59cc061591bbe36d4", "model": "default", "page": 87}, {"hash": "c8b4dcf9ac58518dfd7a0030612750ef310992ecfa1352cc501a3183eddc63ac", "model": "default", "page": 88}, {"hash": "311e4dab810a715c0dd964b03c57ef59105b844638789454a5a31285bb20b6c5", "model": "default", "page": 89}, {"hash": "bcc127d2a49aaeb213cddec0bef6623f19a01d5ea42b6f7495b4f803405c42f6", "model": "default", "page": 90}, {"hash": "bb0ab5360776e0488e57ac48e39d6e0df6200c2570723dcb807ad3f679c09534", "model": "default", "page": 91}, {"hash": "6fd7cdacf0d19eda989b99c3b1e02ef6d6643dbc6cfa6f10037bd0ebb7cd10b5", "model": "default", "page": 92}, {"hash": "d33f0c4ae60d66663fa25b1f7675c11437badaa8a8fa7e51daeebc6141df12ed", "model": "default", "page": 93}, {"hash": "315310a543a8ecc45c434d0e0b8aa54c6566d53d61acb74820a6649e583f9cb2", "model": "default", "page": 94}, {"hash": "38d412966dfe997ab9448d2df046448e5ebbedd2531b8527bd744c8bb5440508", "model": "default", "page": 95}, {"hash": "08d37d1668223a1a7194cf811cd594cfe30e422dd1695df02a8b73a7b735084b", "model": "default", "page": 96}, {"hash": "31d9ea5f81342dbfdc72492243a2e7f0aa9817d84d61eab0181aeaa71d75d7f5", "model": "default", "page": 97}, {"hash": "1cb53ff64bc87e1939f8b45a89a00a6267a02e718ec0c634cf7e20936ffdd4f2", "model": "default", "page": 98}, {"hash": "1245402b982e1a9d1065ac0c0cad30336aa14ecdc2cb3ef4a5c36bc55e9bbd10", "model": "default", "page": 99}, {"hash": "c38f21714819257f54186f075bf6b9446113e03dd6d40e5fd1319fd5cd3c359c", "model": "default", "page": 100}, {"hash": "9bb82caef77080aa11554e67ab1f214e5cf5e8fe2415663d128ba541cf314d5b", "model": "default", "page": 101}, {"hash": "714f390df026d13c65dea02894cf3d91496fd2ae3a94073d90f7714df79d47ee", "model": "default", "page": 102}, {"hash": "f19f8a6e418fdf2a42d8ede7c788f9f8cf33b907e3bb606e9c829320dff3bb5f", "model": "default", "page": 103}, {"hash": "2b15ecb09a734a16ed9804314a6cc9f03a12af63a904fac62a97ea21b1d2ecef", "model": "default", "page": 104}, {"hash": "8b15d46f01007cf63e5bad57b8cd889275c11e6b58bebe48ffec8842d67e7277", "model": "default", "page": 105}, {"hash": "f20a188209524e8fd1692faa3d3450cd075bb45f2962693371867cf166456dc1", "model": "default", "page": 106}, {"hash": "2d4dbf9c96c18bffaeb3b1bd321acea187066e968dd034c585a81a547f4c93c1", "model": "default", "page": 107}, {"hash": "0ef40f53d56676acaf1aef17676d06262391f04c8277eb1ba32ab7ca5d97e875", "model": "default", "page": 108}, {"hash": "25dbff770b7e10a2a2e2668b2f2977d99ed53ed37d3390e1f89d9245abf83e72", "model": "default", "page": 109}, {"hash": "2572c0b17f240729b504355e11e0d2009a92925a1faaa7b66aea649dc59d7905", "model": "default", "page": 110}, {"hash": "a3e79679ca89ec169e9967808ff8b3f9c2c2db25c113cb68c3f3a993eef15408", "model": "default", "page": 111}, {"hash": "5a47310eb886fad70101ea30ef05dee49cbda1d8a7e2446c3c61b66b3f634039", "model": "default", "page": 112}, {"hash": "992b747ebf8d366fcc11d36599c33ed004584f000855942db59e5a30dd625c7c", "model": "default", "page": 113}, {"hash": "f0bb099090288d2d8c2dad45a22598a924b5c8c3b739206496022a8985d56e25", "model": "default", "page": 114}, {"hash": "5d4e2ca3c369a87ae1732a86f0553fe650005db4637a792963f02fee28a3f1dd", "model": "default", "page": 115}, {"hash": "d23e9d367ce0fa476a6c89009c6fc6c8dd8e15dac6c21b1457a87c8ea89fc6ab", "model": "default", "page": 116}, {"hash": "2ed8bcad41539c0196738efdced854e4c0c11736a062c2bb382517307308315f", "model": "default", "page": 117}, {"hash": "a6d6fd7589a6dddaea1ae0ee683f34ba67d229ad1489d43cd55ab4bfa0a09e48", "model": "default", "page": 118}, {"hash": "4373bdfba2b9cb9f431054a081bcdbb9fde02a2a7c555237105645fc7c4300c6", "model": "default", "page": 119}, {"hash": "b9ba9a2d9c6e8fae2ae668710eb75f4e32a1debfca93371c7d2b12c849bd22da", "model": "default", "page": 120}, {"hash": "f0ac55799e80466c2f68c00232e96f16c893b304c5af92380071564bfd79cc2f", "model": "default", "page": 121}, {"hash": "a619cca5375467d6cbf87c25836da41e5a09dcab342c685b34539dd82fe86989", "model": "default", "page": 122}, {"hash": "d5eb13189c1badbc8317352c3077a84871640f1c42ba8d544f2b66e9788940b4", "model": "default", "page": 123}, {"hash": "5328248231376143041b9f94792b736e39d597c55126949b59362f6464ea0a04", "model": "default", "page": 124}, {"hash": "5201845b41de7b7c02c15934aa48093d9c3b7dd783a32f1f6887d16ab27736fd", "model": "default", "page": 125}, {"hash": "53ef8bd7beea5d3619cc02586077a54911c327d5b912872da834d7e26cbddda7", "model": "default", "page": 126}, {"hash": "eb5a30dbe63c79925f80db77000a9ae325904111ec3a76d12f0eabe9ea8184b5", "model": "default", "page": 127}, {"hash": "ea0c7446fc6d2d362e73d4581e7b8ad4608d1a569eaf7728b2565e9a62bfacc2", "model": "default", "page": 128}, {"hash": "ce7040d1ddf6c4ad312a07c56ce385cc338cb6dad98a350a3145fa651df24e10", "model": "default", "page": 129}, {"hash": "a59661e9111d2f306b39d51a1d1c2b60fafa5a0053a15e5c4df080974b4b9c8e", "model": "default", "page": 130}, {"hash": "e0eebbd57c73414b07cd40507f8b0dc3e30b7621a4da103a1b11b98178d614da", "model": "default", "page": 131}, {"hash": "663d5c537942f854d04a288e7cddc273cb931a1671b07345cf6fbd87593e6960", "model": "default", "page": 132}, {"hash": "ee15d566c88e74395f5c9cf500a25235527c226a22ac85bd940113a29690fcd3", "model": "default", "page": 133}, {"hash": "16dcf411e2a595080c73aa2c3aac658c7ea34947642e9f5d74b30637a8232ba0", "model": "default", "page": 134}, {"hash": "d06b834379d4d7edede6ad45cab9324d8ed03f6553a6ace9eef8ee2911517eae", "model": "default", "page": 135}, {"hash": "f39abd05ea9ae74cdd31f3fe7fc2cafb94364c90ff8f85b38fd763e0b4f00492", "model": "default", "page": 136}, {"hash": "c8cc8d0266caeb8d3547582e443238d020cc2b89b9b0a27881fa53a2d53eb373", "model": "default", "page": 137}, {"hash": "5df7c7769a47c31ede50376223cd8c64a630f146185eabfd69e6def4904d11e9", "model": "default", "page": 138}, {"hash": "752a8ff175ffefd5467eb28072d1ae016e4f2d121a42de192874c1314d8782af", "model": "default", "page": 139}, {"hash": "80196ef5402921f88f9a620eecc70cd40660a88bc53f0d7b41932ef750af8cf8", "model": "default", "page": 140}, {"hash": "e0675b1f0bfe007f57df25c89b6606a7fb711a9a2aea0b6ab3ed7f0c344938d9", "model": "default", "page": 141}, {"hash": "34c60aca3232bf01b5bcc0d4f745ecba5742a056e7cd56e78e733d27165319f5", "model": "default", "page": 142}, {"hash": "8add7158d438c17581bf11a58d377832b87438adddd357fc1df9627a01bb050c", "model": "default", "page": 143}, {"hash": "c6bfbf013724102c875b7177a50d9eeebd48325dc2c1ff163e018a5d86b4b638", "model": "default", "page": 144}, {"hash": "6272edb80b7baf8c345cdc69fd8b613712da5cca430baeee8b2bf74383b20940", "model": "default", "page": 145}, {"hash": "637ac3e09c925390e82504f989601641999e308491f5cd0cd8db2a22021a5412", "model": "default", "page": 146}]}, "main-text": [{"prov": [{"bbox": [287.82000732421875, 741.251953125, 418.83355712890625, 763.4519653320312], "page": 1, "span": [0, 11], "__ref_s3_data": null}], "text": "Front cover", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"prov": [{"bbox": [35.70000076293945, 625.8219604492188, 584.6428833007812, 709.2680053710938], "page": 1, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"prov": [{"bbox": [36.900001525878906, 26.895000457763672, 164.45849609375, 42.13602828979492], "page": 1, "span": [0, 17], "__ref_s3_data": null}], "text": "ibm.com /redbooks", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"prov": [{"bbox": [191.8931884765625, 706.8230590820312, 468.1595153808594, 720.9096069335938], "page": 3, "span": [0, 44], "__ref_s3_data": null}], "text": "International Technical Support Organization", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [191.5712432861328, 659.2655639648438, 551.7711181640625, 688.3182373046875], "page": 3, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [191.92127990722656, 629.265869140625, 290.98956298828125, 642.7371215820312], "page": 3, "span": [0, 13], "__ref_s3_data": null}], "text": "November 2014", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [479.2291259765625, 27.93828010559082, 547.263671875, 38.04776382446289], "page": 3, "span": [0, 12], "__ref_s3_data": null}], "text": "REDP-5110-00", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [70.37338256835938, 680.7003173828125, 511.2250671386719, 703.3181762695312], "page": 4, "span": [0, 111], "__ref_s3_data": null}], "text": "Note: Before using this information and the product it supports, read the information in \"Notices\" on page vii.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.45094299316406, 96.07437896728516, 206.09754943847656, 106.79737091064453], "page": 4, "span": [0, 29], "__ref_s3_data": null}], "text": "First Edition (November 2014)", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.08177947998047, 73.64718627929688, 422.2424621582031, 83.91992950439453], "page": 4, "span": [0, 80], "__ref_s3_data": null}], "text": "This edition applies to Version 7, Release 2 of IBM i (product number 5770-SS1).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.635929107666016, 44.85982894897461, 426.39117431640625, 54.95832443237305], "page": 4, "span": [0, 82], "__ref_s3_data": null}], "text": "' Copyright International Business Machines Corporation 2014. All rights reserved.", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.18267822265625, 23.176387786865234, 547.2008666992188, 43.96644592285156], "page": 4, "span": [0, 136], "__ref_s3_data": null}], "text": "Note to U.S. Government Users Restricted Rights -- Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 695.9519653320312, 168.73440551757812, 718.7908325195312], "page": 5, "span": [0, 8], "__ref_s3_data": null}], "text": "Contents", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"prov": [{"bbox": [63.926761627197266, 27.811120986938477, 257.24334716796875, 37.25619888305664], "page": 5, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [538.4729614257812, 27.93828010559082, 547.25927734375, 38.0196647644043], "page": 5, "span": [0, 3], "__ref_s3_data": null}], "text": "iii", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.56709289550781, 27.93828010559082, 75.64199829101562, 37.95931625366211], "page": 6, "span": [0, 2], "__ref_s3_data": null}], "text": "iv", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [90.20014190673828, 27.85855484008789, 331.77874755859375, 37.22001647949219], "page": 6, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/1"}, {"name": "Table", "type": "table", "$ref": "#/tables/2"}, {"prov": [{"bbox": [488.2200012207031, 28.136999130249023, 529.1115112304688, 37.02998352050781], "page": 7, "span": [0, 8], "__ref_s3_data": null}], "text": "Contents", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [541.4024658203125, 27.93828010559082, 547.3956298828125, 37.15127944946289], "page": 7, "span": [0, 1], "__ref_s3_data": null}], "text": "v", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.29622650146484, 27.93828010559082, 75.64199829101562, 37.651676177978516], "page": 8, "span": [0, 2], "__ref_s3_data": null}], "text": "vi", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [90.30646514892578, 27.79586410522461, 331.6808776855469, 37.322059631347656], "page": 8, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.80000305175781, 695.9519653320312, 151.5048065185547, 718.7636108398438], "page": 9, "span": [0, 7], "__ref_s3_data": null}], "text": "Notices", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.18147277832031, 649.8180541992188, 413.7007141113281, 660.0758666992188], "page": 9, "span": [0, 78], "__ref_s3_data": null}], "text": "This information was developed for products and services offered in the U.S.A.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.14546966552734, 579.6738891601562, 547.235595703125, 640.0175170898438], "page": 9, "span": [0, 625], "__ref_s3_data": null}], "text": "IBM may not offer the products, services, or features discussed in this document in other countries. Consult your local IBM representative for information on the products and services currently available in your area. Any reference to an IBM product, program, or service is not intended to state or imply that only that IBM product, program, or service may be used. Any functionally equivalent product, program, or service that does not infringe any IBM intellectual property right may be used instead. However, it is the user's responsibility to evaluate and verify the operation of any non-IBM product, program, or service.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.0940933227539, 540.159912109375, 547.2992553710938, 570.1964721679688], "page": 9, "span": [0, 232], "__ref_s3_data": null}], "text": "IBM may have patents or pending patent applications covering subject matter described in this document. The furnishing of this document does not grant you any license to these patents. You can send license inquiries, in writing, to:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.593505859375, 529.7247314453125, 489.1996154785156, 540.0978393554688], "page": 9, "span": [0, 92], "__ref_s3_data": null}], "text": "IBM Director of Licensing, IBM Corporation, North Castle Drive, Armonk, NY 10504-1785 U.S.A.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.16057586669922, 459.4730224609375, 547.1917114257812, 520.091796875], "page": 9, "span": [0, 541], "__ref_s3_data": null}], "text": "The following paragraph does not apply to the United Kingdom or any other country where such provisions are inconsistent with local law: INTERNATIONAL BUSINESS MACHINES CORPORATION PROVIDES THIS PUBLICATION \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Some states do not allow disclaimer of express or implied warranties in certain transactions, therefore, this statement may not apply to you.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.943748474121094, 410.14208984375, 547.2783813476562, 449.93365478515625], "page": 9, "span": [0, 345], "__ref_s3_data": null}], "text": "This information could include technical inaccuracies or typographical errors. Changes are periodically made to the information herein; these changes will be incorporated in new editions of the publication. IBM may make improvements and/or changes in the product(s) and/or the program(s) described in this publication at any time without notice.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.966217041015625, 369.6625671386719, 539.7974243164062, 400.06964111328125], "page": 9, "span": [0, 286], "__ref_s3_data": null}], "text": "Any references in this information to non-IBM websites are provided for convenience only and do not in any manner serve as an endorsement of those websites. The materials at those websites are not part of the materials for this IBM product and use of those websites is at your own risk.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.32443237304688, 339.65264892578125, 547.1986694335938, 360.1954650878906], "page": 9, "span": [0, 135], "__ref_s3_data": null}], "text": "IBM may use or distribute any of the information you supply in any way it believes appropriate without incurring any obligation to you.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.14064025878906, 269.77093505859375, 544.1587524414062, 329.7679443359375], "page": 9, "span": [0, 526], "__ref_s3_data": null}], "text": "Any performance data contained herein was determined in a controlled environment. Therefore, the results obtained in other operating environments may vary significantly. Some measurements may have been made on development-level systems and there is no guarantee that these measurements will be the same on generally available systems. Furthermore, some measurements may have been estimated through extrapolation. Actual results may vary. Users of this document should verify the applicable data for their specific environment.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.13702392578125, 219.69473266601562, 547.231689453125, 259.8896789550781], "page": 9, "span": [0, 408], "__ref_s3_data": null}], "text": "Information concerning non-IBM products was obtained from the suppliers of those products, their published announcements or other publicly available sources. IBM has not tested those products and cannot confirm the accuracy of performance, compatibility or any other claims related to non-IBM products. Questions on the capabilities of non-IBM products should be addressed to the suppliers of those products.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.02989196777344, 169.76266479492188, 545.7865600585938, 209.7733154296875], "page": 9, "span": [0, 359], "__ref_s3_data": null}], "text": "This information contains examples of data and reports used in daily business operations. To illustrate them as completely as possible, the examples include the names of individuals, companies, brands, and products. All of these names are fictitious and any similarity to the names and addresses used by an actual business enterprise is entirely coincidental.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.42018127441406, 150.16415405273438, 172.49951171875, 160.39039611816406], "page": 9, "span": [0, 18], "__ref_s3_data": null}], "text": "COPYRIGHT LICENSE:", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.03350067138672, 79.5408706665039, 547.2437744140625, 140.08206176757812], "page": 9, "span": [0, 619], "__ref_s3_data": null}], "text": "This information contains sample application programs in source language, which illustrate programming techniques on various operating platforms. You may copy, modify, and distribute these sample programs in any form without payment to IBM, for the purposes of developing, using, marketing or distributing application programs conforming to the application programming interface for the operating platform for which the sample programs are written. These examples have not been thoroughly tested under all conditions. IBM, therefore, cannot guarantee or imply reliability, serviceability, or function of these programs.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.92543411254883, 27.7843074798584, 257.24334716796875, 37.34343719482422], "page": 9, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.465576171875, 27.93828010559082, 547.250244140625, 37.77464294433594], "page": 9, "span": [0, 3], "__ref_s3_data": null}], "text": "vii", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.19252014160156, 706.0162963867188, 154.14569091796875, 721.5706787109375], "page": 10, "span": [0, 10], "__ref_s3_data": null}], "text": "Trademarks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.04251861572266, 629.2591552734375, 547.2604370117188, 689.3146362304688], "page": 10, "span": [0, 591], "__ref_s3_data": null}], "text": "IBM, the IBM logo, and ibm.com are trademarks or registered trademarks of International Business Machines Corporation in the United States, other countries, or both. These and other IBM trademarked terms are marked on their first occurrence in this information with the appropriate symbol (fi or \u2122), indicating US registered or common law trademarks owned by IBM at the time this information was published. Such trademarks may also be registered or common law trademarks in other countries. A current list of IBM trademarks is available on the Web at http://www.ibm.com/legal/copytrade.shtml", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.07420349121094, 599.2596435546875, 546.6150512695312, 619.2008666992188], "page": 10, "span": [0, 133], "__ref_s3_data": null}], "text": "The following terms are trademarks of the International Business Machines Corporation in the United States, other countries, or both:", "type": "paragraph", "name": "Text", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/3"}, {"prov": [{"bbox": [64.15382385253906, 537.2783203125, 311.9006652832031, 547.204833984375], "page": 10, "span": [0, 54], "__ref_s3_data": null}], "text": "The following terms are trademarks of other companies:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.90792465209961, 507.27880859375, 509.53704833984375, 527.1090698242188], "page": 10, "span": [0, 117], "__ref_s3_data": null}], "text": "Windows, and the Windows logo are trademarks of Microsoft Corporation in the United States, other countries, or both.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.3842544555664, 486.98126220703125, 464.51568603515625, 497.27496337890625], "page": 10, "span": [0, 86], "__ref_s3_data": null}], "text": "Other company, product, or service names may be trademarks or service marks of others.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.940345764160156, 26.91827964782715, 81.16200256347656, 36.210243225097656], "page": 10, "span": [0, 4], "__ref_s3_data": null}], "text": "viii", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [95.68927764892578, 26.413494110107422, 337.0337829589844, 36.1352424621582], "page": 10, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.80000305175781, 706.416015625, 235.86239624023438, 717.5160522460938], "page": 11, "span": [0, 30], "__ref_s3_data": null}], "text": "DB2 for i Center of Excellence", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [93.55310821533203, 636.66357421875, 234.06729125976562, 654.3007202148438], "page": 11, "span": [0, 52], "__ref_s3_data": null}], "text": "Solution Brief IBM Systems Lab Services and Training", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [144.47474670410156, 454.5254211425781, 188.74681091308594, 464.9404296875], "page": 11, "span": [0, 10], "__ref_s3_data": null}], "text": "Highlights", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [144.74562072753906, 433.3105773925781, 242.87388610839844, 447.85009765625], "page": 11, "span": [0, 532], "__ref_s3_data": null}], "text": "GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [144.467529296875, 402.7626953125, 259.22869873046875, 425.5424499511719], "page": 11, "span": [0, 876], "__ref_s3_data": null}], "text": "GLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [144.52346801757812, 379.9961242675781, 249.8356170654297, 394.7245788574219], "page": 11, "span": [0, 672], "__ref_s3_data": null}], "text": "GLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [144.7223358154297, 357.3323669433594, 234.2516326904297, 371.9924011230469], "page": 11, "span": [0, 613], "__ref_s3_data": null}], "text": "GLYPHGLYPH GLYPH GLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPH GLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH GLYPHGLYPHGLYPH GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "List-item", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"prov": [{"bbox": [460.6785583496094, 646.5781860351562, 506.2869873046875, 653.638916015625], "page": 11, "span": [0, 14], "__ref_s3_data": null}], "text": "Power Services", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [280.1233215332031, 515.3794555664062, 463.8094177246094, 554.3141479492188], "page": 11, "span": [0, 30], "__ref_s3_data": null}], "text": "DB2 for i Center of Excellence", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [279.622314453125, 503.428466796875, 483.57049560546875, 514.8067626953125], "page": 11, "span": [0, 49], "__ref_s3_data": null}], "text": "Expert help to achieve your business requirements", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [279.9364929199219, 467.1043395996094, 443.2821044921875, 476.8785095214844], "page": 11, "span": [0, 37], "__ref_s3_data": null}], "text": "We build confident, satisfied clients", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [279.7645568847656, 446.6058044433594, 488.1546630859375, 464.781982421875], "page": 11, "span": [0, 122], "__ref_s3_data": null}], "text": "No one else has the vast consulting experiences, skills sharing and renown service offerings to do what we can do for you.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [280.2401123046875, 427.2699890136719, 367.8602294921875, 435.3384704589844], "page": 11, "span": [0, 27], "__ref_s3_data": null}], "text": "Because no one else is IBM.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [279.7528381347656, 366.48248291015625, 500.4913024902344, 415.5780944824219], "page": 11, "span": [0, 318], "__ref_s3_data": null}], "text": "With combined experiences and direct access to development groups, we're the experts in IBM DB2\u00ae for i. The DB2 for i Center of Excellence (CoE) can help you achieve-perhaps reexamine and exceed-your business requirements and gain more confidence and satisfaction in IBM product data management products and solutions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [279.6987609863281, 345.1319274902344, 435.1271667480469, 354.7207946777344], "page": 11, "span": [0, 30], "__ref_s3_data": null}], "text": "Who we are, some of what we do", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [279.5264587402344, 334.4953918457031, 434.56317138671875, 342.8038024902344], "page": 11, "span": [0, 46], "__ref_s3_data": null}], "text": "Global CoE engagements cover topics including:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [280.1374206542969, 315.2233581542969, 401.9148254394531, 323.6479187011719], "page": 11, "span": [0, 38], "__ref_s3_data": null}], "text": "r Database performance and scalability", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [279.9528503417969, 304.53717041015625, 424.9964599609375, 313.6093444824219], "page": 11, "span": [0, 44], "__ref_s3_data": null}], "text": "r Advanced SQL knowledge and skills transfer", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [280.2003479003906, 295.0150451660156, 392.3099060058594, 302.7135314941406], "page": 11, "span": [0, 37], "__ref_s3_data": null}], "text": "r Business intelligence and analytics", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [280.0979919433594, 284.3857421875, 339.94354248046875, 292.44427490234375], "page": 11, "span": [0, 15], "__ref_s3_data": null}], "text": "r DB2 Web Query", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [279.9316101074219, 274.4402160644531, 504.1931457519531, 282.7410583496094], "page": 11, "span": [0, 72], "__ref_s3_data": null}], "text": "r Query/400 modernization for better reporting and analysis capabilities", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [279.8770751953125, 263.43438720703125, 423.13360595703125, 271.96844482421875], "page": 11, "span": [0, 43], "__ref_s3_data": null}], "text": "r Database modernization and re-engineering", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [280.0404968261719, 253.90310668945312, 400.11041259765625, 261.8250732421875], "page": 11, "span": [0, 38], "__ref_s3_data": null}], "text": "r Data-centric architecture and design", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [280.1170959472656, 243.8183135986328, 467.3244323730469, 252.07838439941406], "page": 11, "span": [0, 58], "__ref_s3_data": null}], "text": "r Extremely large database and overcoming limits to growth", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [279.9450988769531, 234.0165557861328, 382.3848876953125, 241.8405303955078], "page": 11, "span": [0, 30], "__ref_s3_data": null}], "text": "r ISV education and enablement", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [93.4457015991211, 623.955322265625, 193.95431518554688, 633.6002197265625], "page": 12, "span": [0, 19], "__ref_s3_data": null}], "text": "What you can expect", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [93.44943237304688, 613.8342895507812, 283.61541748046875, 622.0606079101562], "page": 12, "span": [0, 59], "__ref_s3_data": null}], "text": "Depending on the engagement, our team of consultants offer:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [93.83878326416016, 593.75537109375, 243.2284698486328, 602.1784057617188], "page": 12, "span": [0, 46], "__ref_s3_data": null}], "text": "r Briefings, consulting and guidance on demand", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [93.76670837402344, 574.0309448242188, 282.7251281738281, 592.1212158203125], "page": 12, "span": [0, 75], "__ref_s3_data": null}], "text": "r Illumination of the DB2 for i capabilities and leadership to exploit them", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [93.70116424560547, 543.4135131835938, 274.4058532714844, 571.626708984375], "page": 12, "span": [0, 121], "__ref_s3_data": null}], "text": "r Analysis and remediation of performance and scalability issues caused by inefficient database design and implementation", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [93.66458129882812, 522.5725708007812, 285.813232421875, 541.2940063476562], "page": 12, "span": [0, 97], "__ref_s3_data": null}], "text": "r Configuration of systems, operating system and products to fully leverage database capabilities", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [93.60704040527344, 499.23284912109375, 179.39649963378906, 508.8530578613281], "page": 12, "span": [0, 19], "__ref_s3_data": null}], "text": "Key client benefits", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [93.4247055053711, 438.3083190917969, 282.49176025390625, 503.74200439453125], "page": 12, "span": [0, 316], "__ref_s3_data": null}], "text": "T Gain greater database and application performance within your current environment. Achieve greater productivity in the development and maintenance of database and applications using modern techniques. Architect and design data structures to accommodate and benefit from business analytics (BA) tools and processes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [93.71710968017578, 416.9537353515625, 192.12144470214844, 425.96875], "page": 12, "span": [0, 20], "__ref_s3_data": null}], "text": "For more information", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [93.525146484375, 386.6142883300781, 274.9092102050781, 414.7449035644531], "page": 12, "span": [0, 165], "__ref_s3_data": null}], "text": "Pricing depends on the scope of work. Learn more about the DB2 for i Center of Excellence and other related products and services. Contact stgls@us.ibm.com or visit:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [93.6114730834961, 366.9930725097656, 216.5275421142578, 374.27313232421875], "page": 12, "span": [0, 298], "__ref_s3_data": null}], "text": "ibm.com GLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPHGLYPH", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"prov": [{"bbox": [309.5734558105469, 575.8102416992188, 409.8566589355469, 583.2079467773438], "page": 12, "span": [0, 32], "__ref_s3_data": null}], "text": "\u00a9 Copyright IBM Corporation 2013", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.6012878417969, 561.0743408203125, 358.2181701660156, 567.9740600585938], "page": 12, "span": [0, 15], "__ref_s3_data": null}], "text": "IBM Corporation", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.8481140136719, 553.4358520507812, 338.6964416503906, 559.6707153320312], "page": 12, "span": [0, 9], "__ref_s3_data": null}], "text": "Route 100", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.67144775390625, 545.79736328125, 361.2178039550781, 552.7429809570312], "page": 12, "span": [0, 16], "__ref_s3_data": null}], "text": "Somers, NY 10589", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.4100341796875, 522.8818969726562, 420.7811584472656, 537.3163452148438], "page": 12, "span": [0, 51], "__ref_s3_data": null}], "text": "Produced in the United States of America March 2013", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.1656494140625, 470.847412109375, 505.15802001953125, 516.7039794921875], "page": 12, "span": [0, 367], "__ref_s3_data": null}], "text": "IBM, the IBM logo, ibm.com, DB2 and Power Systems are trademarks of International Business Machines Corp., registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the web at \"Copyright and trademark information\" at www.ibm.com/legal/ copytrade.shtml .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.3301696777344, 450.2502136230469, 500.2719421386719, 464.6746520996094], "page": 12, "span": [0, 101], "__ref_s3_data": null}], "text": "This document is current as of the initial date of publication and may be changed by IBM at any time.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [309.0384826660156, 436.8582763671875, 494.2660827636719, 443.8564758300781], "page": 12, "span": [0, 71], "__ref_s3_data": null}], "text": "Not all offerings are available in every country in which IBM operates.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/6"}, {"prov": [{"bbox": [333.6021728515625, 421.63079833984375, 375.9269104003906, 428.56036376953125], "page": 12, "span": [0, 14], "__ref_s3_data": null}], "text": "Please Recycle", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [446.13677978515625, 118.22988891601562, 505.1431579589844, 125.14271545410156], "page": 12, "span": [0, 16], "__ref_s3_data": null}], "text": "QLS12392-USEN-00", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.80000305175781, 695.9519653320312, 151.46160888671875, 718.642333984375], "page": 13, "span": [0, 7], "__ref_s3_data": null}], "text": "Preface", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.77647399902344, 590.1392822265625, 547.3082275390625, 660.1563720703125], "page": 13, "span": [0, 469], "__ref_s3_data": null}], "text": "This IBMfi Redpaper\u2122 publication provides information about the IBM i 7.2 feature of IBM DB2fi for i Row and Column Access Control (RCAC). It offers a broad description of the function and advantages of controlling access to data in a comprehensive and transparent way. This publication helps you understand the capabilities of RCAC and provides examples of defining, creating, and implementing the row permissions and column masks in a relational database environment.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9678192138672, 531.8663330078125, 546.4656982421875, 577.9606323242188], "page": 13, "span": [0, 309], "__ref_s3_data": null}], "text": "This paper is intended for database engineers, data-centric application developers, and security officers who want to design and implement RCAC as a part of their data control and governance policy. A solid background in IBM i object level security, DB2 for i relational database concepts, and SQL is assumed.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.05535888671875, 449.6070251464844, 547.2366943359375, 472.15496826171875], "page": 13, "span": [0, 172], "__ref_s3_data": null}], "text": "This paper was produced by the IBM DB2 for i Center of Excellence team in partnership with the International Technical Support Organization (ITSO), Rochester, Minnesota US.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/7"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/8"}, {"prov": [{"bbox": [64.00379180908203, 27.771316528320312, 257.24334716796875, 37.35597229003906], "page": 13, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [538.0973510742188, 27.93828010559082, 547.2503051757812, 37.66927719116211], "page": 13, "span": [0, 2], "__ref_s3_data": null}], "text": "xi", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [262.83331298828125, 275.1402587890625, 541.2507934570312, 417.39556884765625], "page": 13, "span": [0, 684], "__ref_s3_data": null}], "text": "Jim Bainbridge is a senior DB2 consultant on the DB2 for i Center of Excellence team in the IBM Lab Services and Training organization. His primary role is training and implementation services for IBM DB2 Web Query for i and business analytics. Jim began his career with IBM 30 years ago in the IBM Rochester Development Lab, where he developed cooperative processing products that paired IBM PCs with IBM S/36 and AS/.400 systems. In the years since, Jim has held numerous technical roles, including independent software vendors technical support on a broad range of IBM technologies and products, and supporting customers in the IBM Executive Briefing Center and IBM Project Office.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [262.7664794921875, 111.162841796875, 541.2737426757812, 265.5693664550781], "page": 13, "span": [0, 726], "__ref_s3_data": null}], "text": "Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Rochester, Minnesota. He writes extensively and teaches IBM classes worldwide in all areas of DB2 for i. Before joining STG Lab Services, he worked in the ITSO for nine years writing multiple IBM Redbooksfi publications. He also worked for IBM Colombia as an IBM AS/400fi IT Specialist doing presales support for the Andean countries. He has 28 years of experience in the computing field and has taught database classes in Colombian universities. He holds a Master's degree in Computer Science from EAFIT, Colombia. His areas of expertise are database technology, performance, and data warehousing. Hernando can be contacted at hbedoya@us.ibm.com .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.30328369140625, 488.9364013671875, 125.36660766601562, 504.30010986328125], "page": 13, "span": [0, 7], "__ref_s3_data": null}], "text": "Authors", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.2465591430664, 27.93828010559082, 78.4020004272461, 37.6812858581543], "page": 14, "span": [0, 3], "__ref_s3_data": null}], "text": "xii", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.39225006103516, 27.678035736083984, 334.4214172363281, 37.328025817871094], "page": 14, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/9"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/10"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/11"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/12"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/13"}, {"prov": [{"bbox": [262.9754943847656, 657.044921875, 541.2052612304688, 715.2725219726562], "page": 14, "span": [0, 271], "__ref_s3_data": null}], "text": "Rob Bestgen is a member of the DB2 for i Center of Excellence team helping customers use the capabilities of DB2 for i. In addition, Rob is the chief architect of the DB2 SQL Query Engine (SQE) for DB2 for i and is the product development manager for DB2 Web Query for i.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [262.9455871582031, 457.75616455078125, 541.171630859375, 599.965576171875], "page": 14, "span": [0, 671], "__ref_s3_data": null}], "text": "Mike Cain is a Senior Technical Staff Member within the IBM Systems and Technology Group. He is also the founder and team leader of the DB2 for i Center of Excellence in Rochester, Minnesota US. Before his current position, he worked as an IBM AS/400 Systems Engineer and technical consultant. Before joining IBM in 1988, Mike worked as a System/38 programmer and data processing manager for a property and casualty insurance company. Mike has 26 years of experience with IBM, engaging clients and Business Partners around the world. In addition to assisting clients, he uses his knowledge and experience to influence the IBM solution, development, and support processes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [262.6852111816406, 341.8028259277344, 541.3219604492188, 447.9212951660156], "page": 14, "span": [0, 494], "__ref_s3_data": null}], "text": "Dan Cruikshank has been an IT Professional since 1972. He has consulted on a number of different project areas since joining IBM Rochester in 1988. Since 1993, Dan was focused primarily on resolving IBM System ifi application and database performance issues at several IBM customer accounts. Since 1998, Dan has been one of the primary instructors for the Database Optimization Workshop. Most recently, Dan is a member of the DB2 for i Center of Excellence team with IBM Rochester Lab Services.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [262.8884582519531, 187.7853546142578, 541.2412719726562, 330.1932067871094], "page": 14, "span": [0, 702], "__ref_s3_data": null}], "text": "Jim Denton is a senior consultant at the IBM DB2 for i Center of Excellence, where his responsibilities include both teaching courses and hands on consulting. Jim specializes in SQL performance, data-centric programming, and database modernization. Jim started his IBM career in 1981 as an S/38 operating system programmer. Before joining the consulting team, his key assignments included 10 years as a systems performance specialist, five years as the lead \"JDE on i\" analyst, three years as a consultant at the IBM Benchmark and Briefing Center in Montpellier France, and a total of 11 years as an operating system developer, including five years designing and implementing enhancements to DB2 for i.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [263.0142517089844, 83.7471923828125, 541.1943969726562, 177.92990112304688], "page": 14, "span": [0, 420], "__ref_s3_data": null}], "text": "Doug Mack is a DB2 for i and Business Intelligence Consultant in the IBM Power Systems\u2122 Lab Services organization. Doug's 30+ year career with IBM spans many roles, including product development, technical sales support, Business Intelligence Sales Specialist, and DB2 for i Product Marketing Manager. Doug is a featured speaker at User Group conferences and meetings, IBM Technical Conferences, and Executive Briefings.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/14"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/15"}, {"prov": [{"bbox": [262.9324951171875, 584.8549194335938, 541.1551513671875, 715.4559936523438], "page": 15, "span": [0, 608], "__ref_s3_data": null}], "text": "Tom McKinley is an IBM Lab Services Consultant working on DB2 for IBM i in Rochester MN. His main focus is complex query performance that is associated with Business Intelligence running on Very Large Databases. He worked as a developer or performance analyst in the DB area from 1986 until 2006. Some of his major pieces of work include the Symmetric Multiple processing capabilities of DB2 for IBM i and Large Object Data types. In addition, he was on the original team that designed and built the SQL Query Engine. Before his database work, he worked on Licensed Internal Code for System 34 and System 36.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [262.8179016113281, 481.3031005859375, 541.2665405273438, 575.6270141601562], "page": 15, "span": [0, 436], "__ref_s3_data": null}], "text": "Kent Milligan is a senior DB2 consultant on the DB2 for i Center of Excellence team within the IBM Lab Services and Training organization. His primary responsibility is helping software developers use the latest DB2 technologies and port applications from other databases to DB2 for i. After graduating from the University of Iowa, Kent spent the first eight years of his IBM career as a member of the DB2 development team in Rochester.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8874969482422, 442.6785888671875, 432.1602478027344, 453.3590087890625], "page": 15, "span": [0, 71], "__ref_s3_data": null}], "text": "Thanks to the following people for their contributions to this project:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.64984130859375, 421.29852294921875, 200.16773986816406, 431.2518615722656], "page": 15, "span": [0, 12], "__ref_s3_data": null}], "text": "Debra Landon", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.55917358398438, 408.904296875, 438.75390625, 419.7124328613281], "page": 15, "span": [0, 62], "__ref_s3_data": null}], "text": "International Technical Support Organization, Rochester Center", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.283203125, 375.0847473144531, 457.729736328125, 397.47100830078125], "page": 15, "span": [0, 97], "__ref_s3_data": null}], "text": "Craig Aldrich, Mark Anderson, Theresa Euler, Scott Forstie, Chad Olstad IBM Rochester Development", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 331.5992736816406, 413.1525573730469, 347.63641357421875], "page": 15, "span": [0, 43], "__ref_s3_data": null}], "text": "Now you can become a published author, too!", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9495849609375, 233.25941467285156, 547.232666015625, 315.36279296875], "page": 15, "span": [0, 551], "__ref_s3_data": null}], "text": "Here's an opportunity to spotlight your skills, grow your career, and become a published author-all at the same time! Join an ITSO residency project and help write a book in your area of expertise, while honing your experience using leading-edge technologies. Your efforts will help to increase product acceptance and customer satisfaction, as you expand your network of technical contacts and relationships. Residencies run from two to six weeks in length, and you can participate either in person or as a remote resident working from your home base.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.6442108154297, 194.49908447265625, 546.8164672851562, 221.5010223388672], "page": 15, "span": [0, 126], "__ref_s3_data": null}], "text": "Find out more about the residency program, browse the residency index, and apply online at: ibm.com /redbooks/residencies.html", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.77066040039062, 151.01637268066406, 219.43165588378906, 166.51051330566406], "page": 15, "span": [0, 16], "__ref_s3_data": null}], "text": "Comments welcome", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0441436767578, 123.81623840332031, 294.74969482421875, 134.08399963378906], "page": 15, "span": [0, 34], "__ref_s3_data": null}], "text": "Your comments are important to us!", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.76272583007812, 90.2786636352539, 547.25244140625, 112.70419311523438], "page": 15, "span": [0, 152], "__ref_s3_data": null}], "text": "We want our papers to be as helpful as possible. Send us your comments about this paper or other IBM Redbooks publications in one of the following ways:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.5581817626953, 73.23909759521484, 412.55645751953125, 83.14573669433594], "page": 15, "span": [0, 72], "__ref_s3_data": null}], "text": "GLYPH Use the online Contact us review Redbooks form found at:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.20013427734375, 56.498329162597656, 231.11917114257812, 65.66831970214844], "page": 15, "span": [0, 17], "__ref_s3_data": null}], "text": "ibm.com /redbooks", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [485.1000061035156, 28.136999130249023, 520.88134765625, 37.25944519042969], "page": 15, "span": [0, 7], "__ref_s3_data": null}], "text": "Preface", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [532.9468994140625, 27.93828010559082, 547.1934204101562, 37.73838806152344], "page": 15, "span": [0, 4], "__ref_s3_data": null}], "text": "xiii", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.14579772949219, 27.93828010559082, 81.16200256347656, 37.55492401123047], "page": 16, "span": [0, 3], "__ref_s3_data": null}], "text": "xiv", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [95.48082733154297, 27.832721710205078, 337.05462646484375, 37.28497314453125], "page": 16, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.69615173339844, 710.8174438476562, 310.3739318847656, 721.4570922851562], "page": 16, "span": [0, 50], "__ref_s3_data": null}], "text": "GLYPH Send your comments in an email to:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.17933654785156, 694.5873413085938, 246.37371826171875, 704.374267578125], "page": 16, "span": [0, 19], "__ref_s3_data": null}], "text": "redbooks@us.ibm.com", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.74884033203125, 624.2797241210938, 426.992431640625, 686.70166015625], "page": 16, "span": [0, 173], "__ref_s3_data": null}], "text": "GLYPH Mail your comments to: IBM Corporation, International Technical Support Organization Dept. HYTD Mail Station P099 2455 South Road Poughkeepsie, NY 12601-5400", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.5674057006836, 581.0363159179688, 317.6510925292969, 597.2184448242188], "page": 16, "span": [0, 30], "__ref_s3_data": null}], "text": "Stay connected to IBM Redbooks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.58547973632812, 554.25830078125, 246.8371124267578, 563.9945678710938], "page": 16, "span": [0, 36], "__ref_s3_data": null}], "text": "GLYPH Find us on Facebook:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.32769775390625, 537.2778930664062, 326.0977478027344, 547.1971435546875], "page": 16, "span": [0, 35], "__ref_s3_data": null}], "text": "http://www.facebook.com/IBMRedbooks", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.44610595703125, 520.2987060546875, 241.52239990234375, 530.50732421875], "page": 16, "span": [0, 37], "__ref_s3_data": null}], "text": "GLYPH Follow us on Twitter:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.69737243652344, 503.3732604980469, 301.0782165527344, 513.1410522460938], "page": 16, "span": [0, 30], "__ref_s3_data": null}], "text": "http://twitter.com/ibmredbooks", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.56719970703125, 486.2793273925781, 257.426513671875, 496.61798095703125], "page": 16, "span": [0, 40], "__ref_s3_data": null}], "text": "GLYPH Look for us on LinkedIn:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.52149963378906, 469.2062072753906, 391.0767822265625, 479.30670166015625], "page": 16, "span": [0, 48], "__ref_s3_data": null}], "text": "http://www.linkedin.com/groups?home=&gid=2130806", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.64007568359375, 439.531982421875, 546.2178955078125, 461.6893615722656], "page": 16, "span": [0, 118], "__ref_s3_data": null}], "text": "GLYPH Explore new Redbooks publications, residencies, and workshops with the IBM Redbooks weekly newsletter:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.62750244140625, 423.0643005371094, 451.3365478515625, 433.34259033203125], "page": 16, "span": [0, 60], "__ref_s3_data": null}], "text": "https://www.redbooks.ibm.com/Redbooks.nsf/subscribe?OpenForm", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.65512084960938, 406.300537109375, 429.3480529785156, 416.6412658691406], "page": 16, "span": [0, 76], "__ref_s3_data": null}], "text": "GLYPH Stay current on recent Redbooks publications with RSS Feeds:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.41917419433594, 389.44549560546875, 331.0777282714844, 398.970458984375], "page": 16, "span": [0, 36], "__ref_s3_data": null}], "text": "http://www.redbooks.ibm.com/rss.html", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/16"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 17, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 699.4268188476562], "page": 17, "span": [0, 1], "__ref_s3_data": null}], "text": "1", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [136.619384765625, 482.1217956542969, 547.3047485351562, 538.820068359375], "page": 17, "span": [0, 36], "__ref_s3_data": null}], "text": "Securing and protecting IBM DB2 data", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.17431640625, 361.6726989746094, 547.2540283203125, 443.9345703125], "page": 17, "span": [0, 648], "__ref_s3_data": null}], "text": "Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0235595703125, 303.58026123046875, 527.206298828125, 349.85009765625], "page": 17, "span": [0, 304], "__ref_s3_data": null}], "text": "Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.7783660888672, 270.1002197265625, 547.1551513671875, 291.9642639160156], "page": 17, "span": [0, 122], "__ref_s3_data": null}], "text": "This chapter describes how you can secure and protect data in DB2 for i. The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.81155395507812, 252.7996368408203, 250.23167419433594, 263.56298828125], "page": 17, "span": [0, 37], "__ref_s3_data": null}], "text": "GLYPH Security fundamentals", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.76536560058594, 240.873291015625, 283.06231689453125, 251.25155639648438], "page": 17, "span": [0, 47], "__ref_s3_data": null}], "text": "GLYPH Current state of IBM i security", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7914276123047, 229.06103515625, 264.8818664550781, 239.46493530273438], "page": 17, "span": [0, 43], "__ref_s3_data": null}], "text": "GLYPH DB2 for i security controls", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.72442626953125, 67.0481948852539, 258.362548828125, 77.52366638183594], "page": 17, "span": [0, 35], "__ref_s3_data": null}], "text": "$^{1 }$http://www.idtheftcenter.org", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [135.91265869140625, 56.83421325683594, 234.79055786132812, 66.7944107055664], "page": 17, "span": [0, 31], "__ref_s3_data": null}], "text": "$^{2 }$http://www.ponemon.org /", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [63.92715072631836, 27.736194610595703, 257.24334716796875, 37.3647346496582], "page": 17, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [541.6627807617188, 27.93828010559082, 547.2176513671875, 37.46987533569336], "page": 17, "span": [0, 1], "__ref_s3_data": null}], "text": "1", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.72772979736328, 702.0524291992188, 267.40582275390625, 718.4620361328125], "page": 18, "span": [0, 25], "__ref_s3_data": null}], "text": "1.1 Security fundamentals", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.1598358154297, 664.178466796875, 545.0048217773438, 685.9579467773438], "page": 18, "span": [0, 133], "__ref_s3_data": null}], "text": "Before reviewing database security techniques, there are two fundamental steps in securing information assets that must be described:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.834716796875, 610.8225708007812, 547.1642456054688, 657.5287475585938], "page": 18, "span": [0, 361], "__ref_s3_data": null}], "text": "GLYPH First, and most important, is the definition of a company's security policy . Without a security policy, there is no definition of what are acceptable practices for using, accessing, and storing information by who, what, when, where, and how. A security policy should minimally address three things: confidentiality, integrity, and availability.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [149.89341735839844, 521.58251953125, 547.2608642578125, 604.0130004882812], "page": 18, "span": [0, 587], "__ref_s3_data": null}], "text": "The monitoring and assessment of adherence to the security policy determines whether your security strategy is working. Often, IBM security consultants are asked to perform security assessments for companies without regard to the security policy. Although these assessments can be useful for observing how the system is defined and how data is being accessed, they cannot determine the level of security without a security policy. Without a security policy, it really is not an assessment as much as it is a baseline for monitoring the changes in the security settings that are captured.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.36105346679688, 505.0062561035156, 541.9920043945312, 515.3053588867188], "page": 18, "span": [0, 90], "__ref_s3_data": null}], "text": "A security policy is what defines whether the system and its settings are secure (or not).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.52206420898438, 415.70892333984375, 547.1582641601562, 497.90234375], "page": 18, "span": [0, 573], "__ref_s3_data": null}], "text": "GLYPH The second fundamental in securing data assets is the use of resource security . If implemented properly, resource security prevents data breaches from both internal and external intrusions. Resource security controls are closely tied to the part of the security policy that defines who should have access to what information resources. A hacker might be good enough to get through your company firewalls and sift his way through to your system, but if they do not have explicit access to your database, the hacker cannot compromise your information assets.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.8480987548828, 381.79827880859375, 535.3616943359375, 403.79864501953125], "page": 18, "span": [0, 179], "__ref_s3_data": null}], "text": "With your eyes now open to the importance of securing information assets, the rest of this chapter reviews the methods that are available for securing database resources on IBM i.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 338.5409240722656, 323.3839111328125, 354.3926086425781], "page": 18, "span": [0, 35], "__ref_s3_data": null}], "text": "1.2 Current state of IBM i security", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.07699584960938, 275.4822998046875, 547.3182373046875, 322.0619812011719], "page": 18, "span": [0, 306], "__ref_s3_data": null}], "text": "Because of the inherently secure nature of IBM i, many clients rely on the default system settings to protect their business data that is stored in DB2 for i. In most cases, this means no data protection because the default setting for the Create default public authority (QCRTAUT) system value is *CHANGE.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.86489868164062, 206.1400604248047, 547.284423828125, 264.1254577636719], "page": 18, "span": [0, 405], "__ref_s3_data": null}], "text": "Even more disturbing is that many IBM i clients remain in this state, despite the news headlines and the significant costs that are involved with databases being compromised. This default security configuration makes it quite challenging to implement basic security policies. A tighter implementation is required if you really want to protect one of your company's most valuable assets, which is the data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.89337158203125, 111.43904876708984, 547.2832641601562, 194.00531005859375], "page": 18, "span": [0, 640], "__ref_s3_data": null}], "text": "Traditionally, IBM i applications have employed menu-based security to counteract this default configuration that gives all users access to the data. The theory is that data is protected by the menu options controlling what database operations that the user can perform. This approach is ineffective, even if the user profile is restricted from running interactive commands. The reason is that in today's connected world there are a multitude of interfaces into the system, from web browsers to PC clients, that bypass application menus. If there are no object-level controls, users of these newer interfaces have an open door to your data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.08084869384766, 27.93828010559082, 72.8219985961914, 37.463680267333984], "page": 18, "span": [0, 1], "__ref_s3_data": null}], "text": "2", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.60530853271484, 27.763837814331055, 328.7811279296875, 37.33225631713867], "page": 18, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.91156005859375, 626.8826904296875, 546.4398193359375, 721.452392578125], "page": 19, "span": [0, 688], "__ref_s3_data": null}], "text": "Some clients using this default configuration have toughened their database security with exit-point solutions from third-party vendors. IBM i exit points allow a user-written program to be called every time that a particular interface (for example, FTP) is used or an event occurs (for example, a profile is created). Security tools that are based on these exit points increase the level of security on a system by locking down interfaces that are not under the control of menu-based or application authority. In addition, exit-point solutions allow clients to implement more granular security controls, such as allowing users access only to the database during certain hours of the day.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.64999389648438, 520.9066772460938, 547.2666015625, 615.0797729492188], "page": 19, "span": [0, 723], "__ref_s3_data": null}], "text": "Although exit-point solutions can provide great benefits, they are not an alternative to object-level control of your databases. Exit-point solutions help secure interfaces, but they do not completely protect the data that is stored in your DB2 objects. Exit points do not exist for every data access interface on the system. Therefore, if an application starts using an unprotected interface, the only thing protecting your data is object-level access control. When your security implementation totally relies on exit points, then it is also important to track any new data interfaces that appear as IBM delivers new releases and products to ensure that your exit-point solution provides coverage for those new interfaces.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.94229125976562, 462.9145812988281, 546.2266235351562, 509.42431640625], "page": 19, "span": [0, 331], "__ref_s3_data": null}], "text": "An exit-point solution is a good option for databases with security holes that are caused by a reliance on the default security setup or menu-based control. However, your security work should not stop there. Instead, you must continue to work on a complete database security solution by controlling data access at the object level.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 419.72991943359375, 295.2049255371094, 435.87127685546875], "page": 19, "span": [0, 31], "__ref_s3_data": null}], "text": "1.3 DB2 for i security controls", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.84832763671875, 333.2792053222656, 547.1688842773438, 403.44085693359375], "page": 19, "span": [0, 511], "__ref_s3_data": null}], "text": "As described in 1.2, \"Current state of IBM i security\" on page 2, object-level controls on your DB2 objects are a critical success factor in securing your business data. Although database object-level security is a strong security feature, some clients have found that object-level security does not have the granularity that is required to adhere to regulatory or compliance policies. A user that is granted object-level access to a DB2 table has the authority to view all of the rows and values in that table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.60513305664062, 310.94268798828125, 466.3186340332031, 321.458984375], "page": 19, "span": [0, 78], "__ref_s3_data": null}], "text": "As shown in Figure 1-1, it is an all-or-nothing access to the rows of a table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.33961486816406, 80.503662109375, 354.8553466796875, 89.81105041503906], "page": 19, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 1-1 All-or-nothing access to the rows of a table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/17"}, {"prov": [{"bbox": [328.4175109863281, 27.881893157958984, 529.1063232421875, 37.1580810546875], "page": 19, "span": [0, 47], "__ref_s3_data": null}], "text": "Chapter 1. Securing and protecting IBM DB2 data", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [541.21044921875, 27.93828010559082, 547.2176513671875, 37.613685607910156], "page": 19, "span": [0, 1], "__ref_s3_data": null}], "text": "3", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.39376831054688, 27.93828010559082, 72.8219985961914, 37.595481872558594], "page": 20, "span": [0, 1], "__ref_s3_data": null}], "text": "4", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.5719223022461, 27.820940017700195, 328.7376708984375, 37.318206787109375], "page": 20, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.9315185546875, 638.8145141601562, 544.3033447265625, 721.4989013671875], "page": 20, "span": [0, 589], "__ref_s3_data": null}], "text": "Many businesses are trying to limit data access to a need-to-know basis. This security goal means that users should be given access only to the minimum set of data that is required to perform their job. Often, users with object-level access are given access to row and column values that are beyond what their business task requires because that object-level security provides an all-or-nothing solution. For example, object-level controls allow a manager to access data about all employees. Most security policies limit a manager to accessing data only for the employees that they manage.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.61784362792969, 606.2153930664062, 301.4690246582031, 619.5537109375], "page": 20, "span": [0, 37], "__ref_s3_data": null}], "text": "1.3.1 Existing row and column control", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.62290954589844, 534.758544921875, 541.5673828125, 593.4801635742188], "page": 20, "span": [0, 377], "__ref_s3_data": null}], "text": "Some IBM i clients have tried augmenting the all-or-nothing object-level security with SQL views (or logical files) and application logic, as shown in Figure 1-2. However, application-based logic is easy to bypass with all of the different data access interfaces that are provided by the IBM i operating system, such as Open Database Connectivity (ODBC) and System i Navigator.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.88414001464844, 476.91424560546875, 547.4407958984375, 523.4513549804688], "page": 20, "span": [0, 340], "__ref_s3_data": null}], "text": "Using SQL views to limit access to a subset of the data in a table also has its own set of challenges. First, there is the complexity of managing all of the SQL view objects that are used for securing data access. Second, scaling a view-based security solution can be difficult as the amount of data grows and the number of users increases.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.26145935058594, 430.86553955078125, 547.232666015625, 465.4564514160156], "page": 20, "span": [0, 247], "__ref_s3_data": null}], "text": "Even if you are willing to live with these performance and management issues, a user with *ALLOBJ access still can directly access all of the data in the underlying DB2 table and easily bypass the security controls that are built into an SQL view.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1416778564453, 91.801513671875, 316.93792724609375, 101.39622497558594], "page": 20, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 1-2 Existing row and column controls", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/18"}, {"prov": [{"bbox": [64.80000305175781, 708.67724609375, 394.39227294921875, 721.4375], "page": 21, "span": [0, 49], "__ref_s3_data": null}], "text": "1.3.2 New controls: Row and Column Access Control", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.84750366210938, 649.1361083984375, 539.9359130859375, 695.3776245117188], "page": 21, "span": [0, 270], "__ref_s3_data": null}], "text": "Based on the challenges that are associated with the existing technology available for controlling row and column access at a more granular level, IBM delivered new security support in the IBM i 7.2 release; this support is known as Row and Column Access Control (RCAC).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.7799530029297, 567.0514526367188, 542.2587280273438, 637.3985595703125], "page": 21, "span": [0, 514], "__ref_s3_data": null}], "text": "The new DB2 RCAC support provides a method for controlling data access across all interfaces and all types of users with a data-centric solution. Moving security processing to the database layer makes it easier to build controls that meet your compliance policies. The RCAC support provides an additional layer of security that complements object-level authorizations to limit data access to a need-to-know basis. Therefore, it is critical that you first have a sound object-level security implementation in place.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [328.35748291015625, 27.924795150756836, 529.1063232421875, 37.201438903808594], "page": 21, "span": [0, 47], "__ref_s3_data": null}], "text": "Chapter 1. Securing and protecting IBM DB2 data", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [541.4357299804688, 27.93828010559082, 547.2176513671875, 37.7031135559082], "page": 21, "span": [0, 1], "__ref_s3_data": null}], "text": "5", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.66896057128906, 27.93828010559082, 72.8219985961914, 37.446083068847656], "page": 22, "span": [0, 1], "__ref_s3_data": null}], "text": "6", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.79338073730469, 27.71957778930664, 328.8094177246094, 37.3686637878418], "page": 22, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/19"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 23, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 2.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 23, "span": [0, 1], "__ref_s3_data": null}], "text": "2", "type": "page-header", "name": "Page-header", "font": null}, {"prov": [{"bbox": [136.8000030517578, 512.864501953125, 515.1311645507812, 538.6773681640625], "page": 23, "span": [0, 30], "__ref_s3_data": null}], "text": "Roles and separation of duties", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.04757690429688, 441.0284118652344, 547.3323974609375, 475.5329895019531], "page": 23, "span": [0, 266], "__ref_s3_data": null}], "text": "One of the primary objectives of row and column access control (RCAC) is to create data security policies that control and govern user access to data and limit the data access of DB2 designers and administrators to only the minimum that is required to do their jobs.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.86463928222656, 383.07965087890625, 547.2574462890625, 429.3913269042969], "page": 23, "span": [0, 290], "__ref_s3_data": null}], "text": "To accomplish these tasks, RCAC engineers devised a set of functional roles that, as a group, implement effectively data access requirements and also limit the span of control of each role so that each role is given only the authorities that are needed to perform its specific set of tasks.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8092803955078, 349.0167541503906, 547.2655029296875, 371.5411071777344], "page": 23, "span": [0, 115], "__ref_s3_data": null}], "text": "This chapter describes the concepts of roles and separation of duties on DB2 for i and covers the following topics:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.91671752929688, 332.0804748535156, 176.71270751953125, 342.05181884765625], "page": 23, "span": [0, 21], "__ref_s3_data": null}], "text": "GLYPH Roles", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.62644958496094, 319.62664794921875, 239.9706268310547, 329.89959716796875], "page": 23, "span": [0, 36], "__ref_s3_data": null}], "text": "GLYPH Separation of duties", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.97868728637695, 27.77071189880371, 257.24334716796875, 37.37629318237305], "page": 23, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [540.9383544921875, 27.93828010559082, 547.2549438476562, 37.7381591796875], "page": 23, "span": [0, 1], "__ref_s3_data": null}], "text": "7", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.08222961425781, 702.8963012695312, 139.42576599121094, 718.1371459960938], "page": 24, "span": [0, 9], "__ref_s3_data": null}], "text": "2.1 Roles", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.79855346679688, 615.6634521484375, 547.2965087890625, 686.0182495117188], "page": 24, "span": [0, 543], "__ref_s3_data": null}], "text": "Traditionally, data access roles are defined in a binary way, where access to the data is either not permitted or access to the data is permitted. A full access capability can also be instantiated by the *ALLOBJ special authority, either explicitly or implicitly, for the security officer. If you hold the role of security officer, or have all *ALLOBJ special authority, you have access to all the data, with no exceptions. Unfortunately, this might not meet the organization's requirements for limiting access to data or separation of duties.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.95726013183594, 569.6976928710938, 547.2587890625, 603.8743286132812], "page": 24, "span": [0, 282], "__ref_s3_data": null}], "text": "To assist with defining roles and the separation of duties with appropriate authority, IBM i provides function usage IDs . A function usage ID implements granular security controls rather than granting users powerful special authorities, such as all object, job control, or service.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.39627075195312, 536.1390380859375, 547.3123779296875, 558.0423583984375], "page": 24, "span": [0, 95], "__ref_s3_data": null}], "text": "Roles are divided among the following DB2 functions and their corresponding function usage IDs:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.72689819335938, 519.0963745117188, 474.09234619140625, 529.4824829101562], "page": 24, "span": [0, 77], "__ref_s3_data": null}], "text": "GLYPH DDM and IBM DRDAfi application server access: QIBM_DB_DDMDRDA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.54745483398438, 506.9635925292969, 375.98358154296875, 517.3781127929688], "page": 24, "span": [0, 62], "__ref_s3_data": null}], "text": "GLYPH Toolbox application server access: QIBM_DB_ZDA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6115264892578, 495.15960693359375, 391.5639953613281, 505.5315246582031], "page": 24, "span": [0, 63], "__ref_s3_data": null}], "text": "GLYPH Database Administrator function: QIBM_DB_SQLADM", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.43470764160156, 483.1597900390625, 383.83270263671875, 493.47198486328125], "page": 24, "span": [0, 61], "__ref_s3_data": null}], "text": "GLYPH Database Information function: QIBM_DB_SYSMON", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6698455810547, 471.068603515625, 385.5570983886719, 481.8918762207031], "page": 24, "span": [0, 63], "__ref_s3_data": null}], "text": "GLYPH Security Administrator function: QIBM_DB_SECADM", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.01905059814453, 438.3138732910156, 501.0556335449219, 451.5218200683594], "page": 24, "span": [0, 61], "__ref_s3_data": null}], "text": "2.1.1 DDM and DRDA application server access: QIBM_DB_DDMDRDA", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.79649353027344, 366.81915283203125, 547.2295532226562, 425.7265625], "page": 24, "span": [0, 392], "__ref_s3_data": null}], "text": "The QIBM_DB_DDMDRDA function usage ID restricts access to the DDM and DRDA application server (QRWTSRVR). This function usage ID provides an easy alternative (rather than writing an exit program) to control access to DDM and DRDA from the server side. The function usage IDs ship with the default authority of *ALLOWED. The security officer can easily deny access to specific users or groups.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.77101135253906, 332.93585205078125, 534.9490356445312, 355.3209533691406], "page": 24, "span": [0, 121], "__ref_s3_data": null}], "text": "This is an alternative to a User Exit Program approach. No coding is required, it is easy to change, and it is auditable.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.1021957397461, 300.5572814941406, 413.2480773925781, 313.5638732910156], "page": 24, "span": [0, 52], "__ref_s3_data": null}], "text": "2.1.2 Toolbox application server access: QIBM_DB_ZDA", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9031524658203, 229.11941528320312, 543.151123046875, 287.4146728515625], "page": 24, "span": [0, 350], "__ref_s3_data": null}], "text": "The QIBM_DB_ZDA function usage ID restricts access to the optimized server that handles DB2 requests from clients (QZDASOINIT and QZDASSINIT). Server access is used by the ODBC, OLE DB, and .NET providers that ship with IBM i Access for Windows and JDBC Toolbox, Run SQL scripts, and other parts of System i Navigator and Navigator for i Web console.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.73422241210938, 170.7083740234375, 546.2078247070312, 217.42959594726562], "page": 24, "span": [0, 289], "__ref_s3_data": null}], "text": "This function usage ID provides an easy alternative (rather than writing an exit program) to control access to these functions from the server side. The function usage IDs ship with the default authority of *ALLOWED. The security officer can easily deny access to specific users or groups.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.910400390625, 136.91490173339844, 534.9490966796875, 159.28839111328125], "page": 24, "span": [0, 121], "__ref_s3_data": null}], "text": "This is an alternative to a User Exit Program approach. No coding is required, it is easy to change, and it is auditable.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.47502899169922, 27.93828010559082, 72.8219985961914, 37.502052307128906], "page": 24, "span": [0, 1], "__ref_s3_data": null}], "text": "8", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.709228515625, 27.739971160888672, 328.7253723144531, 37.31616973876953], "page": 24, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.19741821289062, 708.67724609375, 433.47052001953125, 721.6392211914062], "page": 25, "span": [0, 53], "__ref_s3_data": null}], "text": "2.1.3 Database Administrator function: QIBM_DB_SQLADM", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.8431854248047, 636.6989135742188, 547.0184326171875, 695.5401000976562], "page": 25, "span": [0, 363], "__ref_s3_data": null}], "text": "The Database Administrator function (QIBM_DB_SQLADM) is needed whenever a user is analyzing and viewing SQL performance data. Some of the more common database administrator functions include displaying statements from the SQL Plan Cache, analyzing SQL Performance Monitors and SQL Plan Cache Snapshots, and displaying the SQL details of a job other than your own.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.77134704589844, 590.8430786132812, 547.3245239257812, 625.426025390625], "page": 25, "span": [0, 274], "__ref_s3_data": null}], "text": "The Database Administrator function provides an alternative to granting *JOBCTL, but simply having the Database Administrator authorization does not carry with it all the needed object authorities for every administration task. The default behavior is to deny authorization.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.922607421875, 532.9749755859375, 541.2985229492188, 579.3148193359375], "page": 25, "span": [0, 352], "__ref_s3_data": null}], "text": "To perform database administrator tasks that are not related to performance analysis, you must refer to the details of the task to determine its specific authorization requirements. For example, to allow a database administrator to reorganize a table, the DBA must have additional object authorities to the table that are not covered by QIBM_DB_SQLADM.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.28472900390625, 505.07904052734375, 392.7084045410156, 517.3775024414062], "page": 25, "span": [0, 38], "__ref_s3_data": null}], "text": "Granting QIBM_DB_SQLADM function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.11285400390625, 480.2784729003906, 532.0657348632812, 502.6061706542969], "page": 25, "span": [0, 132], "__ref_s3_data": null}], "text": "Only the security administrator (*SECADM) is allowed to change the list of users that can perform Database Administration functions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.2179183959961, 447.6772766113281, 419.47637939453125, 460.86566162109375], "page": 25, "span": [0, 51], "__ref_s3_data": null}], "text": "2.1.4 Database Information function: QIBM_DB_SYSMON", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.95957946777344, 400.29864501953125, 547.1928100585938, 434.8374328613281], "page": 25, "span": [0, 190], "__ref_s3_data": null}], "text": "The Database Information function (QIBM_DB_SYSMON) provides much less authority than Database Administrator function. Its primary use allows a user to examine high-level database properties.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2222900390625, 354.16015625, 547.2994384765625, 388.70263671875], "page": 25, "span": [0, 229], "__ref_s3_data": null}], "text": "For example, a user that does not have *JOBCTL or QIBM_DB_SQLADM can still view the SQL Plan Cache properties if granted authority to QIBM_DB_SYSMON. Without granting this authority, the default behavior is to deny authorization.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3820037841797, 326.02716064453125, 392.7384033203125, 338.0588684082031], "page": 25, "span": [0, 38], "__ref_s3_data": null}], "text": "Granting QIBM_DB_SYSMON function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.12899780273438, 301.23870849609375, 532.0657348632812, 323.3178405761719], "page": 25, "span": [0, 129], "__ref_s3_data": null}], "text": "Only the security administrator (*SECADM) is allowed to change the list of users that can perform Database Information functions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.09640502929688, 268.63739013671875, 427.0501403808594, 281.47467041015625], "page": 25, "span": [0, 53], "__ref_s3_data": null}], "text": "2.1.5 Security Administrator function: QIBM_DB_SECADM", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.72735595703125, 208.98062133789062, 538.3322143554688, 255.28724670410156], "page": 25, "span": [0, 290], "__ref_s3_data": null}], "text": "The Security Administrator function (QIBM_DB_SECADM) grants authorities, revokes authorities, changes ownership, or changes the primary group without giving access to the object or, in the case of a database table, to the data that is in the table or allowing other operations on the table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.85955810546875, 138.8216094970703, 547.2896118164062, 197.575927734375], "page": 25, "span": [0, 380], "__ref_s3_data": null}], "text": "Only those users with the QIBM_DB_SECADM function can administer and manage RCAC rules. RCAC can be used to prevent even users with *ALLOBJ authority from freely accessing all the data in a protected database. These users are excluded from data access unless they are specifically authorized by RCAC. Without granting this authority, the default behavior is to deny authorization.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.57452392578125, 111.07710266113281, 392.72161865234375, 123.48632049560547], "page": 25, "span": [0, 38], "__ref_s3_data": null}], "text": "Granting QIBM_DB_SECADM function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.02122497558594, 85.55487823486328, 460.46807861328125, 108.20436096191406], "page": 25, "span": [0, 117], "__ref_s3_data": null}], "text": "Only QSECOFR or a user with *SECADM special authority can grant the QIBM_DB_SECADM function usage to a user or group.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [360.43804931640625, 27.703941345214844, 529.1567993164062, 37.179229736328125], "page": 25, "span": [0, 41], "__ref_s3_data": null}], "text": "Chapter 2. Roles and separation of duties", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [541.2987060546875, 27.93828010559082, 547.2176513671875, 37.731666564941406], "page": 25, "span": [0, 1], "__ref_s3_data": null}], "text": "9", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.13709259033203, 708.0068969726562, 335.4955139160156, 721.5980834960938], "page": 26, "span": [0, 38], "__ref_s3_data": null}], "text": "2.1.6 Change Function Usage CL command", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.92727661132812, 684.818115234375, 547.284423828125, 695.4608764648438], "page": 26, "span": [0, 90], "__ref_s3_data": null}], "text": "The following CL commands can be used to work with, display, or change function usage IDs:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.50738525390625, 667.6542358398438, 301.5174865722656, 678.104248046875], "page": 26, "span": [0, 49], "__ref_s3_data": null}], "text": "GLYPH Work Function Usage ( WRKFCNUSG )", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.541015625, 655.2158203125, 313.39776611328125, 666.201416015625], "page": 26, "span": [0, 51], "__ref_s3_data": null}], "text": "GLYPH Change Function Usage ( CHGFCNUSG )", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.42544555664062, 643.4954223632812, 310.8171081542969, 654.1441040039062], "page": 26, "span": [0, 52], "__ref_s3_data": null}], "text": "GLYPH Display Function Usage ( DSPFCNUSG )", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.18296813964844, 610.2984619140625, 512.5380249023438, 632.472412109375], "page": 26, "span": [0, 126], "__ref_s3_data": null}], "text": "For example, the following CHGFCNUSG command shows granting authorization to user HBEDOYA to administer and manage RCAC rules:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.22215270996094, 592.9798583984375, 441.59686279296875, 603.69287109375], "page": 26, "span": [0, 61], "__ref_s3_data": null}], "text": "CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(HBEDOYA) USAGE(*ALLOWED)", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.92118835449219, 560.307373046875, 544.4754638671875, 573.6224365234375], "page": 26, "span": [0, 72], "__ref_s3_data": null}], "text": "2.1.7 Verifying function usage IDs for RCAC with the FUNCTION_USAGE view", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.69790649414062, 525.2785034179688, 519.5179443359375, 547.3473510742188], "page": 26, "span": [0, 130], "__ref_s3_data": null}], "text": "The FUNCTION_USAGE view contains function usage configuration details. Table 2-1 describes the columns in the FUNCTION_USAGE view.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 504.11700439453125, 285.07135009765625, 513.564697265625], "page": 26, "span": [0, 29], "__ref_s3_data": null}], "text": "Table 2-1 FUNCTION_USAGE view", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/4"}, {"prov": [{"bbox": [135.81417846679688, 318.2784729003906, 547.2803955078125, 340.825439453125], "page": 26, "span": [0, 112], "__ref_s3_data": null}], "text": "To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.5948028564453, 296.4012145996094, 463.2222900390625, 306.1437072753906], "page": 26, "span": [0, 74], "__ref_s3_data": null}], "text": "Example 2-1 Query to determine who has authority to define and manage RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/5"}, {"prov": [{"bbox": [64.28413391113281, 156.46859741210938, 249.59605407714844, 172.32984924316406], "page": 26, "span": [0, 24], "__ref_s3_data": null}], "text": "2.2 Separation of duties", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.95748901367188, 81.68995666503906, 547.2234497070312, 140.11083984375], "page": 26, "span": [0, 463], "__ref_s3_data": null}], "text": "Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.75320434570312, 27.93828010559082, 78.4020004272461, 37.570556640625], "page": 26, "span": [0, 2], "__ref_s3_data": null}], "text": "10", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.40293884277344, 27.698881149291992, 334.4214172363281, 37.30914306640625], "page": 26, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.11419677734375, 651.2788696289062, 542.6943359375, 721.3893432617188], "page": 27, "span": [0, 516], "__ref_s3_data": null}], "text": "For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9490509033203, 593.2598266601562, 547.303955078125, 639.5579833984375], "page": 27, "span": [0, 285], "__ref_s3_data": null}], "text": "In IBM i 7.2, the QIBM_DB_SECADM function usage grants authorities, revokes authorities, changes ownership, or changes the primary group without giving access to the object or, in the case of a database table, to the data that is in the table or allowing other operations on the table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1680145263672, 558.7716064453125, 538.6507568359375, 581.5887451171875], "page": 27, "span": [0, 129], "__ref_s3_data": null}], "text": "QIBM_DB_SECADM function usage can be granted only by a user with *SECADM special authority and can be given to a user or a group.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.04412841796875, 513.281005859375, 545.7960205078125, 547.52294921875], "page": 27, "span": [0, 204], "__ref_s3_data": null}], "text": "QIBM_DB_SECADM also is responsible for administering RCAC, which restricts which rows a user is allowed to access in a table and whether a user is allowed to see information in certain columns of a table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.91064453125, 455.2619934082031, 539.80712890625, 501.6045837402344], "page": 27, "span": [0, 285], "__ref_s3_data": null}], "text": "A preferred practice is that the RCAC administrator has the QIBM_DB_SECADM function usage ID, but absolutely no other data privileges. The result is that the RCAC administrator can deploy and maintain the RCAC constructs, but cannot grant themselves unauthorized access to data itself.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.74993896484375, 421.3023681640625, 543.067138671875, 443.2637634277344], "page": 27, "span": [0, 136], "__ref_s3_data": null}], "text": "Table 2-2 shows a comparison of the different function usage IDs and *JOBCTL authority to the different CL commands and DB2 for i tools.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 399.966552734375, 392.8609619140625, 409.5616149902344], "page": 27, "span": [0, 78], "__ref_s3_data": null}], "text": "Table 2-2 Comparison of the different function usage IDs and *JOBCTL authority", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/6"}, {"prov": [{"bbox": [354.5693359375, 27.851680755615234, 523.5407104492188, 37.16465377807617], "page": 27, "span": [0, 41], "__ref_s3_data": null}], "text": "Chapter 2. Roles and separation of duties", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.497520446777344], "page": 27, "span": [0, 2], "__ref_s3_data": null}], "text": "11", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/7"}, {"prov": [{"bbox": [64.78636169433594, 27.93828010559082, 78.4020004272461, 37.45695877075195], "page": 28, "span": [0, 2], "__ref_s3_data": null}], "text": "12", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.417724609375, 27.724334716796875, 334.4214172363281, 37.30672836303711], "page": 28, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/20"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 29, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 3.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.859619140625], "page": 29, "span": [0, 1], "__ref_s3_data": null}], "text": "3", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 513.0821533203125, 546.0291748046875, 538.6365356445312], "page": 29, "span": [0, 29], "__ref_s3_data": null}], "text": "Row and Column Access Control", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.7967987060547, 453.0984802246094, 536.1522827148438, 475.3611755371094], "page": 29, "span": [0, 133], "__ref_s3_data": null}], "text": "This chapter describes what Row and Column Access Control (RCAC) is, its components, and then illustrates RCAC with a simple example.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1170654296875, 431.07891845703125, 347.4121398925781, 441.4175109863281], "page": 29, "span": [0, 49], "__ref_s3_data": null}], "text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.55429077148438, 413.6905822753906, 397.1086730957031, 423.97503662109375], "page": 29, "span": [0, 69], "__ref_s3_data": null}], "text": "GLYPH Explanation of RCAC and the concept of access control", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.65321350097656, 401.5838623046875, 348.458984375, 412.17449951171875], "page": 29, "span": [0, 63], "__ref_s3_data": null}], "text": "GLYPH Special registers and built-in global variables", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7012939453125, 389.8255310058594, 327.0360107421875, 400.37957763671875], "page": 29, "span": [0, 46], "__ref_s3_data": null}], "text": "GLYPH VERIFY_GROUP_FOR_USER function", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.52816772460938, 377.255859375, 454.2698669433594, 388.0998229980469], "page": 29, "span": [0, 86], "__ref_s3_data": null}], "text": "GLYPH Establishing and controlling accessibility by using the RCAC rule text", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.47698974609375, 366.099853515625, 385.8156433105469, 376.4607238769531], "page": 29, "span": [0, 61], "__ref_s3_data": null}], "text": "GLYPH SELECT, INSERT, and UPDATE behavior with RCAC", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5319061279297, 354.047119140625, 270.3636169433594, 364.00299072265625], "page": 29, "span": [0, 39], "__ref_s3_data": null}], "text": "GLYPH Human resources example", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.9825439453125, 27.802661895751953, 257.24334716796875, 37.31828308105469], "page": 29, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.526145935058594], "page": 29, "span": [0, 2], "__ref_s3_data": null}], "text": "13", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.37876892089844, 702.5391845703125, 518.7757568359375, 718.1989135742188], "page": 30, "span": [0, 57], "__ref_s3_data": null}], "text": "3.1 Explanation of RCAC and the concept of access control", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.11453247070312, 639.6259765625, 547.24267578125, 686.0366821289062], "page": 30, "span": [0, 340], "__ref_s3_data": null}], "text": "RCAC limits data access to those users who have a business \"need to know\". RCAC makes it easy to set up a rich and robust security policy that is based on roles and responsibilities. RCAC functionality is made available through the optional, no charge feature called \"IBM Advanced Data Security for i\", also known as option 47 of IBM i 7.2.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0245819091797, 605.769287109375, 505.92376708984375, 627.9368896484375], "page": 30, "span": [0, 141], "__ref_s3_data": null}], "text": "In DB2 for i, RCAC is implemented using two different approaches that address the shortcomings of traditional control methods and mechanisms:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.82949829101562, 589.0179443359375, 227.34536743164062, 599.0405883789062], "page": 30, "span": [0, 31], "__ref_s3_data": null}], "text": "GLYPH Row permissions", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7431182861328, 577.1798095703125, 217.25091552734375, 587.1215209960938], "page": 30, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH Column masks", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.83267211914062, 506.98077392578125, 547.2017822265625, 565.4414672851562], "page": 30, "span": [0, 419], "__ref_s3_data": null}], "text": "Another benefit of RCAC is that no database user is automatically exempt from the control. Users with *ALLOBJ authority can no longer freely access all of the data in the database unless they have the appropriate permission to do so. The ability to manage row permissions and column masks rests with the database security administrator. The RCAC definitions, enablement, and activation are controlled by SQL statements.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1253204345703, 449.1419372558594, 547.2973022460938, 495.46649169921875], "page": 30, "span": [0, 270], "__ref_s3_data": null}], "text": "Row permissions and column masks require virtually no application changes. RCAC is based on specific rules that are transparent to existing applications and SQL interfaces. Enforcement of your security policy does not depend on how applications or tools access the data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.33770751953125, 391.0147705078125, 535.672119140625, 437.35345458984375], "page": 30, "span": [0, 331], "__ref_s3_data": null}], "text": "RCAC also facilitates multi-tenancy, which means that several independent customers or business units can share a single database table without being aware of one another. The RCAC row permission ensures each user sees only the rows they are entitled to view because the enforcement is handled by DB2 and not the application logic.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [141.874267578125, 314.53131103515625, 541.2496337890625, 373.4546813964844], "page": 30, "span": [0, 438], "__ref_s3_data": null}], "text": "Label-based access control (LBAC): RCAC and LBAC are not the same thing. LBAC is a security model that is primarily intended for government applications. LBAC requires that data and users be classified with a fixed set of rules that are implemented. RCAC is a general-purpose security model that is primarily intended for commercial customers. You can use RCAC to create your own security rules, which in turn allows for more flexibility.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.27526092529297, 277.1085205078125, 383.4799499511719, 290.117431640625], "page": 30, "span": [0, 48], "__ref_s3_data": null}], "text": "3.1.1 Row permission and column mask definitions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.13406372070312, 253.6727752685547, 423.5354309082031, 264.1443786621094], "page": 30, "span": [0, 62], "__ref_s3_data": null}], "text": "The following sections define row permission and column masks.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.67124938964844, 226.04348754882812, 229.260009765625, 238.01478576660156], "page": 30, "span": [0, 14], "__ref_s3_data": null}], "text": "Row permission", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.80856323242188, 189.06199645996094, 544.5714721679688, 223.27346801757812], "page": 30, "span": [0, 261], "__ref_s3_data": null}], "text": "A row permission is a database object that manifests a row access control rule for a specific table. It is essentially a search condition that describes which rows you can access. For example, a manager can see only the rows that represent his or her employees.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.69788360595703, 27.93828010559082, 78.4020004272461, 37.5636100769043], "page": 30, "span": [0, 2], "__ref_s3_data": null}], "text": "14", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.36235809326172, 27.683774948120117, 334.4214172363281, 37.30181884765625], "page": 30, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.776123046875, 699.2013549804688, 528.7305908203125, 721.4013061523438], "page": 31, "span": [0, 135], "__ref_s3_data": null}], "text": "The SQL CREATE PERMISSION statement that is shown in Figure 3-1 is used to define and initially enable or disable the row access rules.", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [136.0135498046875, 369.07928466796875, 342.5756530761719, 378.6520080566406], "page": 31, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 3-1 CREATE PERMISSION SQL statement", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/21"}, {"prov": [{"bbox": [136.67723083496094, 340.95599365234375, 215.9995574951172, 352.8724060058594], "page": 31, "span": [0, 11], "__ref_s3_data": null}], "text": "Column mask", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.825439453125, 291.6988525390625, 542.7664794921875, 338.13787841796875], "page": 31, "span": [0, 297], "__ref_s3_data": null}], "text": "A column mask is a database object that manifests a column value access control rule for a specific column in a specific table. It uses a CASE expression that describes what you see when you access the column. For example, a teller can see only the last four digits of a tax identification number.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [344.59918212890625, 27.955989837646484, 523.6016235351562, 37.184593200683594], "page": 31, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.46247100830078], "page": 31, "span": [0, 2], "__ref_s3_data": null}], "text": "15", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.6975326538086, 27.93828010559082, 78.4020004272461, 37.4251594543457], "page": 32, "span": [0, 2], "__ref_s3_data": null}], "text": "16", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.41210174560547, 27.731887817382812, 334.4214172363281, 37.32145690917969], "page": 32, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.9098358154297, 687.2786865234375, 546.5693359375, 721.3076171875], "page": 32, "span": [0, 221], "__ref_s3_data": null}], "text": "Column masks replace the need to create and use views to implement access control. The SQL CREATE MASK statement that is shown in Figure 3-2 is used to define and initially enable or disable the column value access rules.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.08889770507812, 365.8803405761719, 311.9504089355469, 375.2521667480469], "page": 32, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-2 CREATE MASK SQL statement", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/22"}, {"prov": [{"bbox": [64.18714141845703, 332.8692932128906, 286.7970275878906, 346.2061767578125], "page": 32, "span": [0, 34], "__ref_s3_data": null}], "text": "3.1.2 Enabling and activating RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.41273498535156, 285.53582763671875, 547.1828002929688, 320.0932312011719], "page": 32, "span": [0, 184], "__ref_s3_data": null}], "text": "You can enable, disable, or regenerate row permissions and column masks by using the SQL ALTER PERMISSION statement and the SQL ALTER MASK statement, as shown in Figure 3-3 on page 17.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.00491333007812, 168.1006622314453, 547.2286376953125, 274.0113830566406], "page": 32, "span": [0, 734], "__ref_s3_data": null}], "text": "Enabling and disabling effectively turns on or off the logic that is contained in the row permission or column mask. Regenerating causes the row permission or column mask to be regenerated. The row permission definition in the catalog is used and existing dependencies and authorizations, if any, are retained. The row permission definition is reevaluated as though the row permission were being created. Any user-defined functions (UDFs) that are referenced in the row permission must be resolved to the same secure UDFs as were resolved during the original row permission or column mask creation. The regenerate option can be used to ensure that the RCAC logic is intact and still valid before any user attempts to access the table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.18338012695312, 127.9096450805664, 531.8515625, 149.98568725585938], "page": 32, "span": [0, 120], "__ref_s3_data": null}], "text": "Note: An exclusive lock is required on the table object to perform the alter operation. All open cursors must be closed.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.984130859375, 432.4007263183594, 415.3210144042969, 442.02081298828125], "page": 33, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 3-3 ALTER PERMISSION and ALTER MASK SQL statements", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/23"}, {"prov": [{"bbox": [135.7842254638672, 357.56317138671875, 547.2994995117188, 416.1618347167969], "page": 33, "span": [0, 433], "__ref_s3_data": null}], "text": "You can activate and deactivate RCAC for new or existing tables by using the SQL ALTER TABLE statement (Figure 3-4). The ACTIVATE or DEACTIVATE clause must be the option that is specified in the statement. No other alterations are permitted at the same time. The activating and deactivating effectively turns on or off all RCAC processing for the table. Only enabled row permissions and column masks take effect when activating RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.0438995361328, 317.40087890625, 531.8515625, 339.8829650878906], "page": 33, "span": [0, 120], "__ref_s3_data": null}], "text": "Note: An exclusive lock is required on the table object to perform the alter operation. All open cursors must be closed.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.43646240234375, 57.76702117919922, 306.6454772949219, 67.19706726074219], "page": 33, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-4 ALTER TABLE SQL statement", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/24"}, {"prov": [{"bbox": [344.5545349121094, 27.893524169921875, 523.6016235351562, 37.2478141784668], "page": 33, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.54359436035156], "page": 33, "span": [0, 2], "__ref_s3_data": null}], "text": "17", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.6755142211914, 27.93828010559082, 78.4020004272461, 37.471988677978516], "page": 34, "span": [0, 2], "__ref_s3_data": null}], "text": "18", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.40727233886719, 27.745635986328125, 334.4214172363281, 37.298118591308594], "page": 34, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.6855926513672, 638.6604614257812, 547.273681640625, 721.4677124023438], "page": 34, "span": [0, 566], "__ref_s3_data": null}], "text": "When row access control is activated on a table, a default permission is established for that table. The name of this permission is QIBM_DEFAULT_ _. This default permission contains a simple piece of logic (0=1) which is never true. The default permission effectively denies access to every user unless there is a permission defined that allows access explicitly. If row access control is activated on a table, and there is no permission that is defined, no one has permission to any rows. All queries against the table produce an empty set.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.01759338378906, 580.7723388671875, 547.2686157226562, 627.36962890625], "page": 34, "span": [0, 378], "__ref_s3_data": null}], "text": "It is possible to define, create, and enable multiple permissions on a table. Logically, all of the permissions are ORed together to form a comprehensive test of the user's ability to access the data. A column can have only one mask that is defined over it. From an implementation standpoint, it does not matter if you create the column masks first or the row permissions first.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.60574340820312, 541.2984619140625, 537.4868774414062, 563.368896484375], "page": 34, "span": [0, 97], "__ref_s3_data": null}], "text": "Note: If a user does not have permission to access the row, the column mask logic is not invoked.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.3016128540039, 492.1618347167969, 438.7572021484375, 508.3872985839844], "page": 34, "span": [0, 51], "__ref_s3_data": null}], "text": "3.2 Special registers and built-in global variables", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.88925170898438, 454.2984619140625, 525.5018920898438, 476.74810791015625], "page": 34, "span": [0, 105], "__ref_s3_data": null}], "text": "This section describes how you can use special registers and built-in global variables to implement RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.38701629638672, 420.9353942871094, 204.5852813720703, 434.2147216796875], "page": 34, "span": [0, 23], "__ref_s3_data": null}], "text": "3.2.1 Special registers", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0377197265625, 373.72930908203125, 547.1063842773438, 407.9320983886719], "page": 34, "span": [0, 261], "__ref_s3_data": null}], "text": "A special register is a storage area that is defined for an application process by DB2 and is used to store information that can be referenced in SQL statements. A reference to a special register is a reference to a value that is provided by the current server.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.25625610351562, 316.2984619140625, 544.5763549804688, 362.75390625], "page": 34, "span": [0, 323], "__ref_s3_data": null}], "text": "IBM DB2 for i supports four different special registers that can be used to identify what user profiles are relevant to determining object authorities in the current connection to the server. SQL uses the term runtime authorization ID , which corresponds to a user profile on DB2 for i. Here are the four special registers:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.56118774414062, 274.8968200683594, 546.981201171875, 309.5057067871094], "page": 34, "span": [0, 239], "__ref_s3_data": null}], "text": "GLYPH USER is the runtime user profile that determines the object authorities for the current connection to the server. It has a data type of VARCHAR(18). This value can be changed by the SQL statement SET SESSION AUTHORIZATION .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4746551513672, 246.278564453125, 522.5455932617188, 268.92169189453125], "page": 34, "span": [0, 110], "__ref_s3_data": null}], "text": "GLYPH SESSION_USER is the same as the USER register, except that it has a data type of VARCHAR(128).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.55592346191406, 157.2052459716797, 547.2483520507812, 239.66249084472656], "page": 34, "span": [0, 594], "__ref_s3_data": null}], "text": "GLYPH CURRENT USER was added in IBM i 7.2 and is similar to the USER register, but it has one important difference in that it also reports adopted authority. High-level language programs and SQL routines such as functions, procedures, and triggers can optionally be created to run using either the caller's or the owner's user profile to determine data authorities. For example, an SQL procedure can be created to run under the owner's authority by specifying SET OPTION USRPRF=*OWNER . This special register can also be referenced as CURRENT_USER. It has a data type of VARCHAR(128).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.612060546875, 91.61019897460938, 547.2650756835938, 150.18948364257812], "page": 34, "span": [0, 422], "__ref_s3_data": null}], "text": "GLYPH SYSTEM_USER is the user profile that initiates the connection to the server. It is not used by RCAC, but is included here for completeness. Many jobs, including the QZDASOINIT prestarted jobs, initially connect to the server with a default user profile and then change to use some other user profile. SYSTEM_USER reports this value, typically QUSER for a QZDASOINIT job. It has a data type of VARCHAR(128).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0863800048828, 57.777366638183594, 547.2014770507812, 80.15410614013672], "page": 34, "span": [0, 121], "__ref_s3_data": null}], "text": "In addition to these four special registers, any of the DB2 special registers can be referenced as part of the rule text.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.83859252929688, 710.943603515625, 412.20758056640625, 721.3551635742188], "page": 35, "span": [0, 62], "__ref_s3_data": null}], "text": "Table 3-1 summarizes these special registers and their values.", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.8000030517578, 690.0230102539062, 373.028076171875, 699.3997802734375], "page": 35, "span": [0, 58], "__ref_s3_data": null}], "text": "Table 3-1 Special registers and their corresponding values", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/8"}, {"prov": [{"bbox": [136.2273712158203, 556.2984619140625, 538.493896484375, 578.5513305664062], "page": 35, "span": [0, 97], "__ref_s3_data": null}], "text": "Figure 3-5 shows the difference in the special register values when an adopted authority is used:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.69070434570312, 538.869384765625, 411.36138916015625, 549.4674682617188], "page": 35, "span": [0, 75], "__ref_s3_data": null}], "text": "GLYPH A user connects to the server using the user profile ALICE.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.39437866210938, 522.23193359375, 453.2580871582031, 532.7328491210938], "page": 35, "span": [0, 77], "__ref_s3_data": null}], "text": "GLYPH USER and CURRENT USER initially have the same value of ALICE.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.37547302246094, 492.69061279296875, 541.4498291015625, 515.4265747070312], "page": 35, "span": [0, 160], "__ref_s3_data": null}], "text": "GLYPH ALICE calls an SQL procedure that is named proc1, which is owned by user profile JOE and was created to adopt JOE's authority when it is called.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.46484375, 451.6559143066406, 547.2167358398438, 486.2220153808594], "page": 35, "span": [0, 253], "__ref_s3_data": null}], "text": "GLYPH While the procedure is running, the special register USER still contains the value of ALICE because it excludes any adopted authority. The special register CURRENT USER contains the value of JOE because it includes any adopted authority.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.61715698242188, 423.015869140625, 547.3540649414062, 445.505615234375], "page": 35, "span": [0, 133], "__ref_s3_data": null}], "text": "GLYPH When proc1 ends, the session reverts to its original state with both USER and CURRENT USER having the value of ALICE.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.28187561035156, 186.33480834960938, 342.36993408203125, 195.70782470703125], "page": 35, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 3-5 Special registers and adopted authority", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/25"}, {"prov": [{"bbox": [64.09709930419922, 154.2414093017578, 247.02536010742188, 167.46414184570312], "page": 35, "span": [0, 31], "__ref_s3_data": null}], "text": "3.2.2 Built-in global variables", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.147705078125, 119.0784683227539, 518.0011596679688, 141.2449493408203], "page": 35, "span": [0, 161], "__ref_s3_data": null}], "text": "Built-in global variables are provided with the database manager and are used in SQL statements to retrieve scalar values that are associated with the variables.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.06741333007812, 72.93258666992188, 532.3385009765625, 107.38182830810547], "page": 35, "span": [0, 233], "__ref_s3_data": null}], "text": "IBM DB2 for i supports nine different built-in global variables that are read only and maintained by the system. These global variables can be used to identify attributes of the database connection and used as part of the RCAC logic.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [344.6591491699219, 27.951305389404297, 523.6016235351562, 37.228702545166016], "page": 35, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.58589172363281], "page": 35, "span": [0, 2], "__ref_s3_data": null}], "text": "19", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.01134490966797, 27.93828010559082, 78.4020004272461, 37.716331481933594], "page": 36, "span": [0, 2], "__ref_s3_data": null}], "text": "20", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.40630340576172, 27.696765899658203, 334.4214172363281, 37.33338928222656], "page": 36, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.99867248535156, 711.0256958007812, 342.5477294921875, 721.4012451171875], "page": 36, "span": [0, 51], "__ref_s3_data": null}], "text": "Table 3-2 lists the nine built-in global variables.", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.80000305175781, 690.177001953125, 201.90721130371094, 699.1590576171875], "page": 36, "span": [0, 35], "__ref_s3_data": null}], "text": "Table 3-2 Built-in global variables", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/9"}, {"prov": [{"bbox": [64.28919219970703, 455.0362854003906, 384.3638916015625, 471.1539611816406], "page": 36, "span": [0, 34], "__ref_s3_data": null}], "text": "3.3 VERIFY_GROUP_FOR_USER function", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.70318603515625, 355.5380859375, 547.2347412109375, 438.36163330078125], "page": 36, "span": [0, 576], "__ref_s3_data": null}], "text": "The VERIFY_GROUP_FOR_USER function was added in IBM i 7.2. Although it is primarily intended for use with RCAC permissions and masks, it can be used in other SQL statements. The first parameter must be one of these three special registers: SESSION_USER, USER, or CURRENT_USER. The second and subsequent parameters are a list of user or group profiles. Each of these values must be 1 - 10 characters in length. These values are not validated for their existence, which means that you can specify the names of user profiles that do not exist without receiving any kind of error.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1623077392578, 310.2999572753906, 547.2573852539062, 344.6438293457031], "page": 36, "span": [0, 235], "__ref_s3_data": null}], "text": "If a special register value is in the list of user profiles or it is a member of a group profile included in the list, the function returns a long integer value of 1. Otherwise, it returns a value of 0. It never returns the null value.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.18377685546875, 288.2803955078125, 458.44525146484375, 298.98516845703125], "page": 36, "span": [0, 63], "__ref_s3_data": null}], "text": "Here is an example of using the VERIFY_GROUP_FOR_USER function:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80001831054688, 271.2185363769531, 406.0775146484375, 281.7854919433594], "page": 36, "span": [0, 57], "__ref_s3_data": null}], "text": "1. There are user profiles for MGR, JANE, JUDY, and TONY.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.9613494873047, 253.6958770751953, 396.9881591796875, 264.2366027832031], "page": 36, "span": [0, 58], "__ref_s3_data": null}], "text": "2. The user profile JANE specifies a group profile of MGR.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.15640258789062, 225.28138732910156, 536.568603515625, 247.44735717773438], "page": 36, "span": [0, 127], "__ref_s3_data": null}], "text": "3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.25143432617188, 149.68975830078125, 451.01605224609375, 217.97032165527344], "page": 36, "span": [0, 265], "__ref_s3_data": null}], "text": "VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.63925170898438, 687.0557861328125, 512.90087890625, 721.5410766601562], "page": 37, "span": [0, 74], "__ref_s3_data": null}], "text": "3.4 Establishing and controlling accessibility by using the RCAC rule text", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.79978942871094, 636.2785034179688, 544.9019775390625, 670.8543701171875], "page": 37, "span": [0, 230], "__ref_s3_data": null}], "text": "When defining a row permission or column mask, the \"magic\" of establishing and controlling accessibility comes from the rule text . The rule text represents the search criteria and logic that is implemented by the database engine.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0677032470703, 578.1064453125, 545.6567993164062, 623.906982421875], "page": 37, "span": [0, 359], "__ref_s3_data": null}], "text": "In the case of a row permission, the rule text is the \"test\" of whether the user can access the row. If the test result is true, the row can be accessed. If the test result is false, the row essentially does not exist for the user. From a set-at-a-time perspective, the permission defines which rows can be part of the query result set, and which rows cannot.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2191619873047, 532.300048828125, 537.8551025390625, 566.6549072265625], "page": 37, "span": [0, 196], "__ref_s3_data": null}], "text": "In the case of a column mask, the rule text is both the test of whether the user can see the actual column value, and it is the masking logic if the user cannot have access to actual column value.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.39344787597656, 498.2806396484375, 547.2691040039062, 520.6704711914062], "page": 37, "span": [0, 117], "__ref_s3_data": null}], "text": "For a simple example of implementing row permissions and column masks, see 3.6, \"Human resources example\" on page 22.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1230926513672, 464.1593933105469, 545.9476928710938, 485.88037109375], "page": 37, "span": [0, 185], "__ref_s3_data": null}], "text": "In general, almost any set-based, relational logic is valid. For the row permission, the search condition follows the same rules that are used by the search condition in a WHERE clause.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1824188232422, 382.2794494628906, 547.2494506835938, 452.6309509277344], "page": 37, "span": [0, 504], "__ref_s3_data": null}], "text": "For the column mask, the logic follows the same rules as the CASE expression. The result data type, length, null attribute, and CCSID of the CASE expression must be compatible with the data type of the column. If the column does not allow the null value, the result of the CASE expression cannot be the NULL value. The application or interface making the data access request is expecting that all of the column attributes and values are consistent with the original definition, regardless of any masking.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.42941284179688, 348.2829895019531, 542.0492553710938, 370.6573181152344], "page": 37, "span": [0, 127], "__ref_s3_data": null}], "text": "For more information about what is permitted, see the \"Database programming\" topic of the IBM i 7.2 Knowledge Center, found at:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.240966796875, 319.5921936035156, 546.5352172851562, 341.3039855957031], "page": 37, "span": [0, 86], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgdbp.htm?lang =en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0011749267578, 237.2847442626953, 547.0272216796875, 307.4244079589844], "page": 37, "span": [0, 546], "__ref_s3_data": null}], "text": "One of the first tasks in either the row permission or the column mask logic is to determine who the user is, and whether they have access to the data. Elegant methods to establish the identity and attributes of the user can be employed by using the special registers, global variables, and the VERIFY function. After the user's identity is established, it is a simple matter of allowing or disallowing access by using true or false testing. The examples that are included in this paper demonstrate some of the more common and obvious techniques.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.90562438964844, 155.11756896972656, 547.3748779296875, 225.27194213867188], "page": 37, "span": [0, 538], "__ref_s3_data": null}], "text": "More sophisticated methods can employ existential, day of year / time of day, and relational comparisons with set operations. For example, you can use a date master or date dimension table to determine whether the current date is a normal business day. If the current date is a valid business day, then access is allowed. If the current date is not a business day (for example a weekend day or holiday), access is denied. This test can be accomplished by performing a lookup using a subquery, such as the one that is shown in Example 3-1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.77896118164062, 133.42030334472656, 355.3341369628906, 143.4773406982422], "page": 37, "span": [0, 53], "__ref_s3_data": null}], "text": "Example 3-1 Subquery that is used as part of the rule", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/10"}, {"prov": [{"bbox": [344.5984191894531, 27.918827056884766, 523.6016235351562, 37.22893142700195], "page": 37, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.4708862304688, 27.93828010559082, 547.2591552734375, 37.50567626953125], "page": 37, "span": [0, 2], "__ref_s3_data": null}], "text": "21", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.00093841552734, 27.93828010559082, 78.4020004272461, 37.64419937133789], "page": 38, "span": [0, 2], "__ref_s3_data": null}], "text": "22", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.40680694580078, 27.702280044555664, 334.4214172363281, 37.302345275878906], "page": 38, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.98097229003906, 614.8680419921875, 547.2915649414062, 721.5286254882812], "page": 38, "span": [0, 763], "__ref_s3_data": null}], "text": "Given that joins and subqueries can be used to perform set-based operations against existing data that is housed in other objects, almost any relational test can be constructed. If the data in the objects is manipulated over time, the RCAC test logic (and user query results) can be changed without modifying the actual row permission or column mask. This includes moving a user from one group to another or changing a column value that is used to allow or disallow access. For example, if Saturday is now a valid business day, only the BUSINESS_DAY value in the DATE_MASTER must be updated, not the permission logic. This technique can potentially avoid downtime because of the exclusive lock that is needed on the table when adding or changing RCAC definitions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.369140625, 572.0363159179688, 486.3243713378906, 587.82958984375], "page": 38, "span": [0, 49], "__ref_s3_data": null}], "text": "3.5 SELECT, INSERT, and UPDATE behavior with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.89967346191406, 473.217529296875, 547.2974243164062, 555.4274291992188], "page": 38, "span": [0, 566], "__ref_s3_data": null}], "text": "RCAC provides a database-centric approach to determining which rows can be accessed and what column values can be seen by a specific user. Given that the control is handled by DB2 internally, every data manipulation statement is under the influence of RCAC, with no exceptions. When accessing the table, the SELECT statements, searched UPDATE statements, and searched DELETE statements implicitly and transparently contain the row permission and the column mask rule text. This means that the data set can be logically restricted and reduced on a user by user basis.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.10336303710938, 427.2479248046875, 547.1958618164062, 461.4225158691406], "page": 38, "span": [0, 275], "__ref_s3_data": null}], "text": "Furthermore, DB2 prevents an INSERT statement from inserting a row or an UPDATE statement from modifying a row such that the current user cannot be permitted to access it. You cannot create a situation in which the data you inserted or changed is no longer accessible to you.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.00917053222656, 393.11602783203125, 547.2606811523438, 415.4470520019531], "page": 38, "span": [0, 138], "__ref_s3_data": null}], "text": "For more information and considerations about data movement in an RCAC environment, see Chapter 6, \"Additional considerations\" on page 85.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.0465545654297, 340.999267578125, 541.2198486328125, 375.3176574707031], "page": 38, "span": [0, 213], "__ref_s3_data": null}], "text": "Note: DB2 does not provide any indication back to the user that the data set requested was restricted or reduced by RCAC. This is by design, as it helps minimize any changes to the applications accessing the data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.39986419677734, 293.036376953125, 298.8533935546875, 308.852783203125], "page": 38, "span": [0, 27], "__ref_s3_data": null}], "text": "3.6 Human resources example", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.86647033691406, 206.2593536376953, 542.829345703125, 276.0174255371094], "page": 38, "span": [0, 436], "__ref_s3_data": null}], "text": "This section illustrates with a simple example the usage of RCAC on a typical Human Resources application (schema). In this sample Human Resources schema, there is an important table that is called EMPLOYEES that contains all the information that is related to the employees of the company. Among the information that normally is stored in the EMPLOYEES table, there is some sensitive information that must be hidden from certain users:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.812744140625, 189.279541015625, 235.38601684570312, 199.46461486816406], "page": 38, "span": [0, 34], "__ref_s3_data": null}], "text": "GLYPH Tax_Id information", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6238250732422, 177.03131103515625, 470.03765869140625, 187.34808349609375], "page": 38, "span": [0, 87], "__ref_s3_data": null}], "text": "GLYPH YEAR of the birth date of the employee (hiding the age of the employee)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.17919921875, 154.9170379638672, 375.29803466796875, 165.3710174560547], "page": 38, "span": [0, 57], "__ref_s3_data": null}], "text": "In this example, there are four different types of users:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6767120361328, 138.28036499023438, 200.1146697998047, 148.322509765625], "page": 38, "span": [0, 25], "__ref_s3_data": null}], "text": "GLYPH Employees", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.50352478027344, 125.92362976074219, 195.63865661621094, 136.71217346191406], "page": 38, "span": [0, 24], "__ref_s3_data": null}], "text": "GLYPH Managers", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.46554565429688, 114.28074645996094, 276.60760498046875, 124.81513977050781], "page": 38, "span": [0, 39], "__ref_s3_data": null}], "text": "GLYPH Human Resources Manager", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.2389678955078, 90.28113555908203, 539.58447265625, 112.66990661621094], "page": 38, "span": [0, 144], "__ref_s3_data": null}], "text": "GLYPH Consultant/IT Database Engineer (In this example, this person is an external consultant that is not an employee of the company.)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7884979248047, 56.261756896972656, 546.5243530273438, 77.96189880371094], "page": 38, "span": [0, 109], "__ref_s3_data": null}], "text": "The following sections describe step-by-step what is needed to be done to implement RCAC in this environment.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.26187133789062, 708.1668701171875, 500.5502014160156, 721.559814453125], "page": 39, "span": [0, 65], "__ref_s3_data": null}], "text": "3.6.1 Assigning the QIBM_DB_SECADM function ID to the consultants", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.6376190185547, 660.9580688476562, 547.2426147460938, 695.4973754882812], "page": 39, "span": [0, 228], "__ref_s3_data": null}], "text": "The consultant must have authority to implement RCAC, so you must use one of the function IDs that are provided in DB2 for i (see 2.1.5, \"Security Administrator function: QIBM_DB_SECADM\" on page 9). Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 619.9730834960938, 510.97723388671875, 654.0428466796875], "page": 39, "span": [0, 161], "__ref_s3_data": null}], "text": "1. Run the Change Functional Usage ( CHGFCNUSG ) CL commands that are shown in Example 3-2. These commands must be run by someone that has the *SECOFR authority.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.8000030517578, 598.985107421875, 358.47119140625, 608.8162841796875], "page": 39, "span": [0, 50], "__ref_s3_data": null}], "text": "Example 3-2 Function ID required to implement RCAC", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.4434051513672, 568.8560791015625, 441.59588623046875, 591.7393798828125], "page": 39, "span": [0, 121], "__ref_s3_data": null}], "text": "CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(HBEDOYA) USAGE(*ALLOWED) CHGFCNUSG FCNID(QIBM_DB_SECADM) USER(MCAIN) USAGE(*ALLOWED)", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.14793395996094, 533.0126953125, 547.2882080078125, 555.4136962890625], "page": 39, "span": [0, 163], "__ref_s3_data": null}], "text": "2. There is a way to discover which user profiles have authorization to implement RCAC. This can be done by running the SQL statement that is shown in Example 3-3.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.76101684570312, 511.7995300292969, 460.1641540527344, 521.465576171875], "page": 39, "span": [0, 77], "__ref_s3_data": null}], "text": "Example 3-3 Verifying what user profiles have authorization to implement RCAC", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.20632934570312, 420.8342590332031, 346.6770935058594, 505.3314208984375], "page": 39, "span": [0, 129], "__ref_s3_data": null}], "text": "SELECT function_id, user_name, usage, user_type FROM qsys2.function_usage WHERE function_id ='QIBM_DB_SECADM' ORDER BY user_name;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.12759399414062, 386.04730224609375, 545.5682983398438, 408.11676025390625], "page": 39, "span": [0, 151], "__ref_s3_data": null}], "text": "3. The result of the SQL statement is shown in Figure 3-6. In this example, either MCAIN or HBEDOYA can implement RCAC in the Human Resources database.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.321044921875, 314.7570495605469, 307.5635681152344, 324.75347900390625], "page": 39, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 3-6 Result of the function ID query", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.15365600585938, 282.8797912597656, 418.56524658203125, 296.369140625], "page": 39, "span": [0, 59], "__ref_s3_data": null}], "text": "3.6.2 Creating group profiles for the users and their roles", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.047607421875, 247.4474334716797, 532.9351806640625, 269.863525390625], "page": 39, "span": [0, 154], "__ref_s3_data": null}], "text": "Assuming that all the employees have a valid user profile, the next step is to create group profiles to group the employees. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 230.03945922851562, 357.9049987792969, 240.60972595214844], "page": 39, "span": [0, 51], "__ref_s3_data": null}], "text": "1. In this example, there are three group profiles:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.1498565673828, 213.60145568847656, 313.8529357910156, 223.9517364501953], "page": 39, "span": [0, 30], "__ref_s3_data": null}], "text": "-HR (Human Resource personnel)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.3465118408203, 201.096923828125, 242.8311767578125, 212.0452117919922], "page": 39, "span": [0, 15], "__ref_s3_data": null}], "text": "-MGR (Managers)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.22793579101562, 189.64846801757812, 245.60702514648438, 199.87611389160156], "page": 39, "span": [0, 16], "__ref_s3_data": null}], "text": "-EMP (Employees)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.51315307617188, 160.13287353515625, 547.29541015625, 182.80564880371094], "page": 39, "span": [0, 174], "__ref_s3_data": null}], "text": "These are created by creating user profiles with no password. Example 3-4 shows the Create User Profile ( CRTUSRPRF ) CL commands that you use to create these group profiles.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 138.42959594726562, 286.0830383300781, 148.75363159179688], "page": 39, "span": [0, 35], "__ref_s3_data": null}], "text": "Example 3-4 Creating group profiles", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.74122619628906, 93.17837524414062, 547.5665893554688, 138.2897186279297], "page": 39, "span": [0, 174], "__ref_s3_data": null}], "text": "CRTUSRPRF USRPRF(EMP) PASSWORD() TEXT('Employees Group') CRTUSRPRF USRPRF(MGR) PASSWORD() TEXT('Managers Group') CRTUSRPRF USRPRF(HR) PASSWORD() TEXT('Human Resources Group')", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [344.6204833984375, 27.91693878173828, 523.6016235351562, 37.26707458496094], "page": 39, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5922241210938, 27.93828010559082, 547.2591552734375, 37.411338806152344], "page": 39, "span": [0, 2], "__ref_s3_data": null}], "text": "23", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.0204849243164, 27.93828010559082, 78.4020004272461, 37.76950454711914], "page": 40, "span": [0, 2], "__ref_s3_data": null}], "text": "24", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.37641143798828, 27.63180923461914, 334.4214172363281, 37.37206268310547], "page": 40, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.3994903564453, 674.6657104492188, 547.2675170898438, 721.529296875], "page": 40, "span": [0, 308], "__ref_s3_data": null}], "text": "2. You now must assign users to a group profile. Employees go in to the EMP group profile, Managers go into the MGR group profile, and Human Resource employees go into the HR group profile. For simplicity, this example selects one employee (DSSMITH), one manager (TQSPENSER), and one HR analyst (VGLUCCHESS).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [156.58865356445312, 646.8115234375, 533.43896484375, 657.5706176757812], "page": 40, "span": [0, 81], "__ref_s3_data": null}], "text": "Note: Neither of the consultants (MCAIN and HBEDOYA) belong to any group profile.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.1897964477539, 609.5682983398438, 360.1609802246094, 622.9415283203125], "page": 40, "span": [0, 44], "__ref_s3_data": null}], "text": "3.6.3 Demonstrating data access without RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.1055908203125, 574.2984619140625, 540.3065185546875, 596.710693359375], "page": 40, "span": [0, 127], "__ref_s3_data": null}], "text": "Before implementing RCAC, run some simple SQL statements to demonstrate data access without RCAC. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 545.2590942382812, 547.2156982421875, 567.3441162109375], "page": 40, "span": [0, 124], "__ref_s3_data": null}], "text": "1. The first SQL statement, which is shown in Example 3-5, basically counts the total number of rows in the EMPLOYEES table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.759765625, 523.2703247070312, 334.7440490722656, 533.2840576171875], "page": 40, "span": [0, 44], "__ref_s3_data": null}], "text": "Example 3-5 Counting the number of employees", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2035675048828, 505.5345458984375, 406.6163635253906, 516.2594604492188], "page": 40, "span": [0, 54], "__ref_s3_data": null}], "text": "SELECT COUNT(*) as ROW_COUNT FROM HR_SCHEMA.EMPLOYEES;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [150.52401733398438, 469.4679260253906, 545.1071166992188, 492.073974609375], "page": 40, "span": [0, 103], "__ref_s3_data": null}], "text": "The result of this query is shown in Figure 3-7, which is the total number of employees of the company.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.25228881835938, 407.0002746582031, 272.8937683105469, 416.7295227050781], "page": 40, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 3-7 Number of employees", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/26"}, {"prov": [{"bbox": [136.22938537597656, 368.23065185546875, 547.2517700195312, 390.6070861816406], "page": 40, "span": [0, 160], "__ref_s3_data": null}], "text": "2. Run a second SQL statement (shown in Example 3-6) that lists the employees. If you have read access to the table, you see all the rows no matter who you are.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.8000030517578, 347.4570007324219, 369.31500244140625, 355.7820129394531], "page": 40, "span": [0, 55], "__ref_s3_data": null}], "text": "Example 3-6 Displaying the information of the Employees", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 245.0392303466797, 286.67803955078125, 340.6308898925781], "page": 40, "span": [0, 124], "__ref_s3_data": null}], "text": "SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [150.65087890625, 710.772705078125, 356.252197265625, 721.2792358398438], "page": 41, "span": [0, 48], "__ref_s3_data": null}], "text": "The result of this query is shown in Figure 3-8.", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.38050079345703, 311.7666931152344, 276.68267822265625, 321.1317138671875], "page": 41, "span": [0, 49], "__ref_s3_data": null}], "text": "Figure 3-8 List of employees without RCAC enabled", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.3528823852539, 278.95111083984375, 339.9589538574219, 292.13372802734375], "page": 41, "span": [0, 43], "__ref_s3_data": null}], "text": "3.6.4 Defining and creating row permissions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.41700744628906, 255.6973419189453, 484.33428955078125, 266.0401916503906], "page": 41, "span": [0, 72], "__ref_s3_data": null}], "text": "Implement RCAC on the EMPLOYEES table by completing the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80001831054688, 226.56398010253906, 519.3287963867188, 249.3506622314453], "page": 41, "span": [0, 104], "__ref_s3_data": null}], "text": "1. Start by defining a row permission. In this example, the rules to enforce include the following ones:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.22027587890625, 209.7350616455078, 392.5151062011719, 219.887451171875], "page": 41, "span": [0, 48], "__ref_s3_data": null}], "text": "-Human Resources employees can see all the rows.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.24195861816406, 197.71392822265625, 484.94476318359375, 207.83270263671875], "page": 41, "span": [0, 70], "__ref_s3_data": null}], "text": "-Managers can see only information for the employees that they manage.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.22813415527344, 185.97039794921875, 371.5732421875, 195.80975341796875], "page": 41, "span": [0, 46], "__ref_s3_data": null}], "text": "-Employees can see only their own information.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.13531494140625, 173.5364532470703, 415.18304443359375, 183.9356689453125], "page": 41, "span": [0, 58], "__ref_s3_data": null}], "text": "-Consultants are not allowed to see any rows in the table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [344.7210693359375, 27.904325485229492, 523.6016235351562, 37.22553253173828], "page": 41, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5884399414062, 27.93828010559082, 547.2591552734375, 37.458621978759766], "page": 41, "span": [0, 2], "__ref_s3_data": null}], "text": "25", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.911659240722656, 27.93828010559082, 78.4020004272461, 37.578487396240234], "page": 42, "span": [0, 2], "__ref_s3_data": null}], "text": "26", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.68279266357422, 334.4214172363281, 37.28973388671875], "page": 42, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [150.37896728515625, 710.8544311523438, 538.560302734375, 721.4290161132812], "page": 42, "span": [0, 85], "__ref_s3_data": null}], "text": "To implement this row permission, run the SQL statement that is shown in Example 3-7.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.6933135986328, 689.595703125, 383.918701171875, 699.4521484375], "page": 42, "span": [0, 56], "__ref_s3_data": null}], "text": "Example 3-7 Creating a permission for the EMPLOYEE table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [134.9765625, 547.642822265625, 547.2913818359375, 688.2454833984375], "page": 42, "span": [0, 438], "__ref_s3_data": null}], "text": "CREATE PERMISSION HR_SCHEMA.PERMISSION1_ON_EMPLOYEES ON HR_SCHEMA.EMPLOYEES AS EMPLOYEES FOR ROWS WHERE ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR' ) = 1 ) OR ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND ( EMPLOYEES . MANAGER_OF_EMPLOYEE = SESSION_USER OR EMPLOYEES . USER_ID = SESSION_USER ) ) OR ( VERIFY_GROUP_FOR_USER ( SESSION_USER , 'EMP' ) = 1 AND EMPLOYEES . USER_ID = SESSION_USER ) ENFORCED FOR ALL ACCESS ENABLE ;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.09703063964844, 504.1363220214844, 539.8582153320312, 538.7603759765625], "page": 42, "span": [0, 212], "__ref_s3_data": null}], "text": "2. Look at the definition of the table and see the permissions, as shown in Figure 3-9. QIBM_DEFAULT_EMPLOYEE_HR_SCHEMA is the default permission, as described in 3.1.2, \"Enabling and activating RCAC\" on page 16.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.4061508178711, 291.1373596191406, 331.3226013183594, 300.8897705078125], "page": 42, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 3-9 Row permissions that are shown in System i Navigator", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/27"}, {"prov": [{"bbox": [64.20075988769531, 258.3492431640625, 327.4058837890625, 271.5242919921875], "page": 42, "span": [0, 40], "__ref_s3_data": null}], "text": "3.6.5 Defining and creating column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.30203247070312, 222.73190307617188, 526.414306640625, 245.54981994628906], "page": 42, "span": [0, 96], "__ref_s3_data": null}], "text": "Define the different masks for the columns that are sensitive by completing the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 194.2399139404297, 538.78564453125, 216.82652282714844], "page": 42, "span": [0, 104], "__ref_s3_data": null}], "text": "1. Start with the DAY_OF_BIRTH column. In this example, the rules to enforce include the following ones:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.34336853027344, 177.2699432373047, 467.65625, 187.3153839111328], "page": 42, "span": [0, 67], "__ref_s3_data": null}], "text": "-Human Resources can see the entire date of birth of the employees.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.10040283203125, 160.46939086914062, 375.3867492675781, 170.65682983398438], "page": 42, "span": [0, 48], "__ref_s3_data": null}], "text": "-Employees can see only their own date of birth.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.22607421875, 142.966064453125, 547.2565307617188, 153.288818359375], "page": 42, "span": [0, 83], "__ref_s3_data": null}], "text": "-Managers can see the date of birth of their employees masked with YEAR being 9999.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.5434112548828, 126.48641204833984, 530.0606689453125, 136.5352020263672], "page": 42, "span": [0, 82], "__ref_s3_data": null}], "text": "To implement this column mask, run the SQL statement that is shown in Example 3-8.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.55294799804688, 105.13160705566406, 404.0565185546875, 114.82978820800781], "page": 42, "span": [0, 58], "__ref_s3_data": null}], "text": "Example 3-8 Creation of a mask on the DATE_OF_BIRTH column", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/11"}, {"prov": [{"bbox": [136.0765838623047, 529.9927368164062, 523.3837280273438, 720.341552734375], "page": 43, "span": [0, 449], "__ref_s3_data": null}], "text": "RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [135.99525451660156, 495.1485595703125, 547.2122192382812, 517.6535034179688], "page": 43, "span": [0, 136], "__ref_s3_data": null}], "text": "2. The other column to mask in this example is the TAX_ID information. In this example, the rules to enforce include the following ones:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.24124145507812, 478.3014831542969, 469.1528015136719, 488.7835998535156], "page": 43, "span": [0, 62], "__ref_s3_data": null}], "text": "-Human Resources can see the unmasked TAX_ID of the employees.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.32850646972656, 461.2357177734375, 403.98541259765625, 471.3717956542969], "page": 43, "span": [0, 50], "__ref_s3_data": null}], "text": "-Employees can see only their own unmasked TAX_ID.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.30641174316406, 432.28228759765625, 545.16845703125, 454.51611328125], "page": 43, "span": [0, 129], "__ref_s3_data": null}], "text": "-Managers see a masked version of TAX_ID with the first five characters replaced with the X character (for example, XXX-XX-1234).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.07168579101562, 414.8314208984375, 529.463623046875, 425.31011962890625], "page": 43, "span": [0, 77], "__ref_s3_data": null}], "text": "-Any other person sees the entire TAX_ID as masked, for example, XXX-XX-XXXX.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.67160034179688, 397.85516357421875, 530.060302734375, 407.97796630859375], "page": 43, "span": [0, 82], "__ref_s3_data": null}], "text": "To implement this column mask, run the SQL statement that is shown in Example 3-9.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.63172912597656, 377.08172607421875, 352.5632629394531, 386.8071594238281], "page": 43, "span": [0, 48], "__ref_s3_data": null}], "text": "Example 3-9 Creating a mask on the TAX_ID column", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.53025817871094, 104.04078674316406, 545.3026123046875, 374.6419372558594], "page": 43, "span": [0, 590], "__ref_s3_data": null}], "text": "CREATE MASK HR_SCHEMA.MASK_TAX_ID_ON_EMPLOYEES ON HR_SCHEMA.EMPLOYEES AS EMPLOYEES FOR COLUMN TAX_ID RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR' ) = 1 THEN EMPLOYEES . TAX_ID WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . TAX_ID WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 'XXX-XX-' CONCAT QSYS2 . SUBSTR ( EMPLOYEES . TAX_ID , 8 , 4 ) ) WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'EMP' ) = 1 THEN EMPLOYEES . TAX_ID ELSE 'XXX-XX-XXXX' END ENABLE ;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [344.6368103027344, 27.94813346862793, 523.6016235351562, 37.276817321777344], "page": 43, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.412353515625, 27.93828010559082, 547.2591552734375, 37.551448822021484], "page": 43, "span": [0, 2], "__ref_s3_data": null}], "text": "27", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.16213989257812, 710.7819213867188, 449.952392578125, 721.4285278320312], "page": 44, "span": [0, 65], "__ref_s3_data": null}], "text": "3. Figure 3-10 shows the masks that are created in the HR_SCHEMA.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.32794189453125, 609.9722290039062, 294.5016784667969, 619.325927734375], "page": 44, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 3-10 Column masks shown in System i Navigator", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/28"}, {"prov": [{"bbox": [64.31277465820312, 576.8663940429688, 203.98521423339844, 590.3197021484375], "page": 44, "span": [0, 21], "__ref_s3_data": null}], "text": "3.6.6 Activating RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.171875, 529.45751953125, 547.2256469726562, 563.8794555664062], "page": 44, "span": [0, 265], "__ref_s3_data": null}], "text": "Now that you have created the row permission and the two column masks, RCAC must be activated. The row permission and the two column masks are enabled (last clause in the scripts), but now you must activate RCAC on the table. To do so, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.7984619140625, 513.2022094726562, 409.4788818359375, 523.4619750976562], "page": 44, "span": [0, 57], "__ref_s3_data": null}], "text": "1. Run the SQL statements that are shown in Example 3-10.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.4545135498047, 491.461669921875, 375.2909851074219, 501.6716613769531], "page": 44, "span": [0, 51], "__ref_s3_data": null}], "text": "Example 3-10 Activating RCAC on the EMPLOYEES table", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.58570861816406, 426.5678405761719, 376.8215637207031, 485.00579833984375], "page": 44, "span": [0, 180], "__ref_s3_data": null}], "text": "/* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.21578979492188, 378.27978515625, 540.8014526367188, 412.595458984375], "page": 44, "span": [0, 231], "__ref_s3_data": null}], "text": "2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas \uf0ae HR_SCHEMA \uf0ae Tables , right-click the EMPLOYEES table, and click Definition .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.287353515625, 134.46551513671875, 348.3514404296875, 144.03317260742188], "page": 44, "span": [0, 65], "__ref_s3_data": null}], "text": "Figure 3-11 Selecting the EMPLOYEES table from System i Navigator", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/29"}, {"prov": [{"bbox": [63.94425964355469, 27.93828010559082, 78.4020004272461, 37.58649444580078], "page": 44, "span": [0, 2], "__ref_s3_data": null}], "text": "28", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.39510345458984, 27.71799087524414, 334.4214172363281, 37.344871520996094], "page": 44, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.1829071044922, 699.0615844726562, 531.1966552734375, 721.4234619140625], "page": 45, "span": [0, 152], "__ref_s3_data": null}], "text": "3. The EMPLOYEES table definition is displayed, as shown in Figure 3-12. Note that the Row access control and Column access control options are checked.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.24822998046875, 441.8091125488281, 356.59588623046875, 451.58001708984375], "page": 45, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 3-12 RCAC enabled on the EMPLOYEES table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/30"}, {"prov": [{"bbox": [64.26148223876953, 409.7285461425781, 340.0064392089844, 422.9130554199219], "page": 45, "span": [0, 41], "__ref_s3_data": null}], "text": "3.6.7 Demonstrating data access with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.89842224121094, 374.4912109375, 547.259521484375, 396.6586608886719], "page": 45, "span": [0, 100], "__ref_s3_data": null}], "text": "You are now ready to start testing RCAC with the four different users. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 332.77825927734375, 544.1072387695312, 367.4451599121094], "page": 45, "span": [0, 243], "__ref_s3_data": null}], "text": "1. The first SQL statement that is shown in Example 3-11 illustrates the EMPLOYEE count. You know that there are 42 rows from the query that was run before RCAC was put in place (see 3.6.3, \"Demonstrating data access without RCAC\" on page 24).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.8000030517578, 312.35699462890625, 279.0827941894531, 320.6820068359375], "page": 45, "span": [0, 28], "__ref_s3_data": null}], "text": "Example 3-11 EMPLOYEES count", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 294.8070983886719, 406.6163635253906, 303.5818786621094], "page": 45, "span": [0, 54], "__ref_s3_data": null}], "text": "SELECT COUNT(*) as ROW_COUNT FROM HR_SCHEMA.EMPLOYEES;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2246551513672, 258.3946533203125, 511.5380859375, 280.7793273925781], "page": 45, "span": [0, 146], "__ref_s3_data": null}], "text": "2. The result of the query for a user that belongs to the HR group profile is shown in Figure 3-13. This user can see all the 42 rows (employees).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.48751831054688, 196.33412170410156, 309.8641662597656, 206.0880889892578], "page": 45, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-13 Count of EMPLOYEES by HR", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/31"}, {"prov": [{"bbox": [136.1068572998047, 145.77272033691406, 540.7218627929688, 180.23095703125], "page": 45, "span": [0, 216], "__ref_s3_data": null}], "text": "3. The result of the same query for a user who is logged on as TQSPENSER (Manager) is shown in Figure 3-14. TQSPENSER has five employees in his department and he can also see his own row, which is why the count is 6.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0082244873047, 84.47093200683594, 340.1214904785156, 94.05392456054688], "page": 45, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 3-14 Count of EMPLOYEES by a manager", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/32"}, {"prov": [{"bbox": [344.5658264160156, 27.9404296875, 523.6016235351562, 37.236785888671875], "page": 45, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5850830078125, 27.93828010559082, 547.2591552734375, 37.54125213623047], "page": 45, "span": [0, 2], "__ref_s3_data": null}], "text": "29", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.2293701171875, 27.93828010559082, 78.4020004272461, 37.64487838745117], "page": 46, "span": [0, 2], "__ref_s3_data": null}], "text": "30", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.41975402832031, 27.68392562866211, 334.4214172363281, 37.348270416259766], "page": 46, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.95506286621094, 698.8024291992188, 547.213623046875, 721.3870849609375], "page": 46, "span": [0, 169], "__ref_s3_data": null}], "text": "4. The result of the same query that is run by an employee (DSSMITH) gives the result that is shown in Figure 3-15. Each employee can see only his or her own data (row).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.09197998046875, 637.8728637695312, 347.52752685546875, 647.5279541015625], "page": 46, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 3-15 Count of EMPLOYEES by an employee", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/33"}, {"prov": [{"bbox": [136.4210205078125, 586.8585205078125, 543.9885864257812, 621.4335327148438], "page": 46, "span": [0, 200], "__ref_s3_data": null}], "text": "5. The result of the same query that is run by the Consultant/DBE gives the result that is shown in Figure 3-16. The consultants/DBE can manage and implement RCAC, but they do not see any rows at all.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.2426300048828, 525.92822265625, 345.4479675292969, 535.4974365234375], "page": 46, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 3-16 Count of EMPLOYEES by a consultant", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/34"}, {"prov": [{"bbox": [151.1999969482422, 499.5382995605469, 456.2101135253906, 509.35888671875], "page": 46, "span": [0, 65], "__ref_s3_data": null}], "text": "Does the result make sense? Yes, it does because RCAC is enabled.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.42527770996094, 458.2580261230469, 544.2874145507812, 492.674072265625], "page": 46, "span": [0, 262], "__ref_s3_data": null}], "text": "6. Run queries against the EMPLOYEES table. The query that is used in this example runs and tests with the four different user profiles and is the same query that was run in 3.6.3, \"Demonstrating data access without RCAC\" on page 24. It is shown in Example 3-12.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.8000030517578, 437.3970031738281, 396.18621826171875, 445.7220153808594], "page": 46, "span": [0, 62], "__ref_s3_data": null}], "text": "Example 3-12 SELECT statement to test with the different users", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 334.8448181152344, 266.6982727050781, 429.0736999511719], "page": 46, "span": [0, 124], "__ref_s3_data": null}], "text": "SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [135.98915100097656, 699.0184936523438, 547.152587890625, 721.4612426757812], "page": 47, "span": [0, 145], "__ref_s3_data": null}], "text": "7. Figure 3-17 shows the results of the query for a Human Resources (VGLUCCHESS) user profile. The user can see all the rows and all the columns.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.40335845947266, 295.6847839355469, 338.4682312011719, 305.3370666503906], "page": 47, "span": [0, 64], "__ref_s3_data": null}], "text": "Figure 3-17 SQL statement result by Human Resources user profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.15513610839844, 256.77691650390625, 546.0484008789062, 279.5661315917969], "page": 47, "span": [0, 139], "__ref_s3_data": null}], "text": "8. Figure 3-18 shows the results of the same query for the Manager (TQSPENSER). Notice the masking of the DATE_OF_BIRTH and TAX_ID columns.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.69406127929688, 164.74615478515625, 279.8969421386719, 174.571044921875], "page": 47, "span": [0, 51], "__ref_s3_data": null}], "text": "Figure 3-18 SQL statement result by Manager profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.2742919921875, 126.19644927978516, 518.9005737304688, 148.72137451171875], "page": 47, "span": [0, 145], "__ref_s3_data": null}], "text": "9. Figure 3-19 shows the results of the same query for an employee (DSSMITH). The employee can only see only his own data with no masking at all.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.48681640625, 76.97213745117188, 295.5399169921875, 86.59312438964844], "page": 47, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 3-19 SQL statement result by an employee profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [344.598876953125, 27.90372085571289, 523.6016235351562, 37.23920440673828], "page": 47, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6412963867188, 27.93828010559082, 547.2591552734375, 37.40355682373047], "page": 47, "span": [0, 2], "__ref_s3_data": null}], "text": "31", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.32125091552734, 27.93828010559082, 78.4020004272461, 37.604923248291016], "page": 48, "span": [0, 2], "__ref_s3_data": null}], "text": "32", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.700542449951172, 334.4214172363281, 37.34117126464844], "page": 48, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.8000030517578, 699.1849975585938, 547.2752685546875, 721.3404541015625], "page": 48, "span": [0, 117], "__ref_s3_data": null}], "text": "10.Figure 3-20 shows the results of the same query for the Consultant/DBE, who is not one of the company's employees.", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.52674102783203, 617.0689697265625, 307.95556640625, 626.601806640625], "page": 48, "span": [0, 58], "__ref_s3_data": null}], "text": "Figure 3-20 SQL statement result by Consultant/DBE profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.2658462524414, 585.1321411132812, 409.0855407714844, 598.2813110351562], "page": 48, "span": [0, 52], "__ref_s3_data": null}], "text": "3.6.8 Demonstrating data access with a view and RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.6865692138672, 561.75830078125, 515.0767822265625, 572.1253662109375], "page": 48, "span": [0, 83], "__ref_s3_data": null}], "text": "This section covers data access with a view and RCAC. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.65882873535156, 520.366943359375, 547.2307739257812, 554.7566528320312], "page": 48, "span": [0, 227], "__ref_s3_data": null}], "text": "1. The EMPLOYEES table has a column that is called On_Leave_Flag (Figure 3-21 on page 33) indicating that the employee is on Leave of Absence. For this purpose, a view is created that lists only the employees that are on leave.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.60844421386719, 215.6061248779297, 198.87405395507812, 225.38070678710938], "page": 49, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 3-21 Employees on leave", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.15798950195312, 188.87811279296875, 355.6940002441406, 199.41677856445312], "page": 49, "span": [0, 49], "__ref_s3_data": null}], "text": "2. Example 3-13 shows the definition of the view.", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.8000030517578, 167.15521240234375, 311.72705078125, 177.46084594726562], "page": 49, "span": [0, 39], "__ref_s3_data": null}], "text": "Example 3-13 VIew of employees on leave", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.8000030517578, 66.62857055664062, 426.59613037109375, 159.4019775390625], "page": 49, "span": [0, 159], "__ref_s3_data": null}], "text": "CREATE VIEW HR_SCHEMA.EMPLOYEES_ON_LEAVE (EMPLOYEE_ID, FIRST_NAME, MIDDLE_INITIAL, LAST_NAME, WORK_DEPARTMENT, PHONE_EXTENSION, JOB_DESCRIPTION, DATE_OF_BIRTH,", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [344.6611328125, 27.86074447631836, 523.6016235351562, 37.31344985961914], "page": 49, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7655029296875, 27.93828010559082, 547.2591552734375, 37.37984085083008], "page": 49, "span": [0, 2], "__ref_s3_data": null}], "text": "33", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [258.8564758300781, 674.603759765625, 446.6356201171875, 720.6338500976562], "page": 50, "span": [0, 53], "__ref_s3_data": null}], "text": "TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE, ON_LEAVE_FLAG )", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.84320068359375, 495.2419738769531, 271.8138427734375, 674.0155639648438], "page": 50, "span": [0, 233], "__ref_s3_data": null}], "text": "AS SELECT EMPLOYEE_ID, FIRST_NAME , MIDDLE_INITIAL, LAST_NAME , WORK_DEPARTMENT, PHONE_EXTENSION, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE, ON_LEAVE_FLAG FROM HR_SCHEMA.EMPLOYEES WHERE ON_LEAVE_FLAG = 'Y';", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.26315307617188, 454.30218505859375, 547.3662109375, 476.8594665527344], "page": 50, "span": [0, 115], "__ref_s3_data": null}], "text": "3. Use the view to query the data and see who is on leave. The SQL statement that is used is shown in Example 3-14:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.8000030517578, 433.1369934082031, 353.97808837890625, 441.4620056152344], "page": 50, "span": [0, 49], "__ref_s3_data": null}], "text": "Example 3-14 SQL statement for employees on leave", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.48304748535156, 330.2506103515625, 316.67755126953125, 426.0392761230469], "page": 50, "span": [0, 134], "__ref_s3_data": null}], "text": "SELECT EMPLOYEE_ID, LAST_NAME, JOB_DESCRIPTION, DATE_OF_BIRTH, TAX_ID, USER_ID, MANAGER_OF_EMPLOYEE FROM HR_SCHEMA.EMPLOYEES_ON_LEAVE;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [135.9942169189453, 270.6485595703125, 547.2506713867188, 317.7777404785156], "page": 50, "span": [0, 264], "__ref_s3_data": null}], "text": "4. Start with the Human Resources person (VGLUCCHESS) and see what is the result of the previous query. He sees the two employees that are on leave and no masking is done over the DATE_OF_BIRTH and TAX_ID columns. The results of the query are shown in Figure 3-22.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.3441390991211, 212.0773162841797, 302.135009765625, 221.99578857421875], "page": 50, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 3-22 Employees on leave - Human Resources user", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.1876220703125, 149.55426025390625, 546.509521484375, 196.12924194335938], "page": 50, "span": [0, 285], "__ref_s3_data": null}], "text": "5. Figure 3-23 shows what the Manager (TQSPENSER) gets when he runs the same query over the view. He sees only the employees that are on leave that are managed by him. In this example, it is one employee. The columns are masked, which confirms that RCAC is applied to the view as well.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.43692779541016, 98.1999740600586, 314.1068115234375, 108.11901092529297], "page": 50, "span": [0, 58], "__ref_s3_data": null}], "text": "Figure 3-23 Employee on leave - Manager of Field Reps user", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.26634979248047, 27.93828010559082, 78.4020004272461, 37.68272018432617], "page": 50, "span": [0, 2], "__ref_s3_data": null}], "text": "34", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.34017944335938, 27.696313858032227, 334.4214172363281, 37.3791618347168], "page": 50, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.16725158691406, 698.8943481445312, 536.1886596679688, 721.2736206054688], "page": 51, "span": [0, 170], "__ref_s3_data": null}], "text": "6. Figure 3-24 shows what the employee (DSSMITH) gets when he runs the same query over the view. The employee gets an empty set or he gets only himself if he is on leave.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.79974365234375, 684.2783813476562, 67.5686264038086, 693.4913940429688], "page": 51, "span": [0, 1], "__ref_s3_data": null}], "text": ".", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.52584838867188, 626.8951416015625, 265.8390808105469, 636.5194702148438], "page": 51, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 3-24 Employees on leave - employee user", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [344.75775146484375, 27.914899826049805, 523.6016235351562, 37.27613830566406], "page": 51, "span": [0, 40], "__ref_s3_data": null}], "text": "Chapter 3. Row and Column Access Control", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7324829101562, 27.93828010559082, 547.2591552734375, 37.47168731689453], "page": 51, "span": [0, 2], "__ref_s3_data": null}], "text": "35", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.37136840820312, 27.93828010559082, 78.4020004272461, 37.52334976196289], "page": 52, "span": [0, 2], "__ref_s3_data": null}], "text": "36", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.71300506591797, 334.534423828125, 37.346683502197266], "page": 52, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/35"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 53, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 4.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 699.2093505859375], "page": 53, "span": [0, 1], "__ref_s3_data": null}], "text": "4", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.44911193847656, 451.1016845703125, 532.0337524414062, 538.6605224609375], "page": 53, "span": [0, 59], "__ref_s3_data": null}], "text": "Implementing Row and Column Access Control: Banking example", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.91500854492188, 367.058837890625, 546.1500854492188, 413.4091491699219], "page": 53, "span": [0, 289], "__ref_s3_data": null}], "text": "This chapter illustrates the Row and Column Access Control (RCAC) concepts using a banking example. Appendix A, \"Database definitions for the RCAC banking example\" on page 121 provides a script that you can use to create all the database definitions or DDLs to re-create this RCAC example.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.71763610839844, 344.9483642578125, 347.4121398925781, 355.50213623046875], "page": 53, "span": [0, 49], "__ref_s3_data": null}], "text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.78599548339844, 327.7929992675781, 393.0647888183594, 338.13604736328125], "page": 53, "span": [0, 67], "__ref_s3_data": null}], "text": "GLYPH Business requirements for the RCAC banking scenario", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.63722229003906, 315.72662353515625, 368.54632568359375, 326.46844482421875], "page": 53, "span": [0, 67], "__ref_s3_data": null}], "text": "GLYPH Description of the users roles and responsibilities", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.69590759277344, 303.98077392578125, 261.51287841796875, 314.0971984863281], "page": 53, "span": [0, 38], "__ref_s3_data": null}], "text": "GLYPH Implementation of RCAC", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.962242126464844, 27.78060531616211, 257.24334716796875, 37.377197265625], "page": 53, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5948486328125, 27.93828010559082, 547.2591552734375, 37.74201202392578], "page": 53, "span": [0, 2], "__ref_s3_data": null}], "text": "37", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.24122619628906, 702.5103149414062, 512.5513916015625, 718.264892578125], "page": 54, "span": [0, 55], "__ref_s3_data": null}], "text": "4.1 Business requirements for the RCAC banking scenario", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.84820556640625, 663.7042236328125, 543.9003295898438, 685.9572143554688], "page": 54, "span": [0, 189], "__ref_s3_data": null}], "text": "As part of a new internet banking project, the Bank decides to raise the level of data access control on the following three tables that are involved in the new customer-facing application:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.72608947753906, 647.138916015625, 214.6067352294922, 657.6016845703125], "page": 54, "span": [0, 25], "__ref_s3_data": null}], "text": "GLYPH CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6984405517578, 635.1390991210938, 206.64071655273438, 645.5955810546875], "page": 54, "span": [0, 24], "__ref_s3_data": null}], "text": "GLYPH ACCOUNTS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5414581298828, 623.1392822265625, 229.18223571777344, 633.65625], "page": 54, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH TRANSACTIONS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.95913696289062, 541.180419921875, 547.2745971679688, 611.2424926757812], "page": 54, "span": [0, 471], "__ref_s3_data": null}], "text": "RCAC will be used to restrict access to the rows in these three tables by using permissions, and to restrict column values by using masks. The default position is that no user can access the rows in the tables. From there, specific bank employees are allowed access only to the rows for their job responsibilities. In addition, columns containing personal or sensitive data are masked appropriately. Bank customers are allowed access to only their rows and column values.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9995574951172, 459.0948486328125, 546.8505859375, 529.453857421875], "page": 54, "span": [0, 469], "__ref_s3_data": null}], "text": "In this example, it is assumed that the Bank employees have access to the tables when working on the premises only. Employee access to data is provided by programs and tools using standard DB2 interfaces, such as embedded SQL, ODBC, JDBC, and CLI. The database connection authentication for these interfaces uses the employee's personal and unique IBM i user profile. Operating in their professional role, employees do not have access to bank data through the Internet.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.01345825195312, 365.1432800292969, 547.270751953125, 447.330810546875], "page": 54, "span": [0, 571], "__ref_s3_data": null}], "text": "Bank customers have access to their accounts and transactions by using a new web application. Each customer has unique credentials for logging in to the application. The authentication of the customer is handled by the web server. After the customer is authenticated, the web server establishes a connection to DB2 for data access. This connection uses a common IBM i user profile that is known as WEBUSER. This user profile is secured and is used only by the web application. No Bank employee has access to the WEBUSER profile, and no customer has an IBM i user profile.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.03805541992188, 295.1842041015625, 547.2139892578125, 353.35540771484375], "page": 54, "span": [0, 350], "__ref_s3_data": null}], "text": "The customer's identity is passed to DB2 by using a global variable. The global variable is secured and can be accessed only by the WEBUSER. The web application sets the CUSTOMER_LOGIN_ID variable to the customer's login value. This value is compared to the customer's login value that is found in the CUSTOMER_LOGIN_ID column of the CUSTOMERS table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.91661071777344, 261.1648254394531, 547.2429809570312, 283.2774963378906], "page": 54, "span": [0, 120], "__ref_s3_data": null}], "text": "Applications that do not use the web interface do not have to be changed because the global variable is NULL by default.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.26496124267578, 27.93828010559082, 78.4020004272461, 37.504844665527344], "page": 54, "span": [0, 2], "__ref_s3_data": null}], "text": "38", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.41310119628906, 27.687854766845703, 334.4214172363281, 37.32243728637695], "page": 54, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.6597900390625, 710.8218994140625, 442.2786865234375, 721.31201171875], "page": 55, "span": [0, 70], "__ref_s3_data": null}], "text": "A diagram of the internet banking architecture is shown in Figure 4-1:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.56939697265625, 670.2985229492188, 547.3546142578125, 704.505615234375], "page": 55, "span": [0, 200], "__ref_s3_data": null}], "text": "GLYPH The row permission and column masks for the CUSTOMERS table are based on the group of which the user profile is part. If the user is a customer, their specific login ID also is tested.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.55992126464844, 628.9960327148438, 546.7332153320312, 663.4683227539062], "page": 55, "span": [0, 203], "__ref_s3_data": null}], "text": "GLYPH The row permission and column mask for the ACCOUNTS table are based on the CUSTOMERS table permission rules. A subquery is used to connect the accounts (child) with the customer (parent).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.64488220214844, 576.1607055664062, 546.2234497070312, 622.9520874023438], "page": 55, "span": [0, 284], "__ref_s3_data": null}], "text": "GLYPH The row permission for the TRANSACTIONS table is based on the ACCOUNTS table permission rules and the CUSTOMERS table permission rules. A subquery is used to connect the transactions (child) with the account (parent) and the account (child) with the customer (parent).", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.3312530517578, 293.15283203125, 286.0867919921875, 302.4555969238281], "page": 55, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-1 Internet banking example", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/36"}, {"prov": [{"bbox": [64.38951110839844, 250.45684814453125, 475.6933898925781, 266.34906005859375], "page": 55, "span": [0, 55], "__ref_s3_data": null}], "text": "4.2 Description of the users roles and responsibilities", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.03475952148438, 211.77857971191406, 533.134521484375, 234.10536193847656], "page": 55, "span": [0, 99], "__ref_s3_data": null}], "text": "During the requirements gathering phase, the following groups of users are identified and codified:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.5544891357422, 194.29025268554688, 395.1046142578125, 204.80311584472656], "page": 55, "span": [0, 70], "__ref_s3_data": null}], "text": "GLYPH SECURITY: Security officer and security administrators", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56268310546875, 182.1162872314453, 266.7660217285156, 192.73492431640625], "page": 55, "span": [0, 39], "__ref_s3_data": null}], "text": "GLYPH DBE: Database engineers", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.47203063964844, 170.73941040039062, 319.29107666015625, 180.6303253173828], "page": 55, "span": [0, 51], "__ref_s3_data": null}], "text": "GLYPH ADMIN: Bank business administrators", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.59524536132812, 158.73960876464844, 246.766357421875, 168.96905517578125], "page": 55, "span": [0, 36], "__ref_s3_data": null}], "text": "GLYPH TELLER: Bank tellers", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.43141174316406, 146.72010803222656, 365.2953796386719, 156.56028747558594], "page": 55, "span": [0, 59], "__ref_s3_data": null}], "text": "GLYPH CUSTOMER: Bank customers using the internet", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4724578857422, 134.33944702148438, 325.77801513671875, 144.67828369140625], "page": 55, "span": [0, 53], "__ref_s3_data": null}], "text": "GLYPH PUBLIC: Anyone not already in a group", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [214.38104248046875, 27.994510650634766, 523.5935668945312, 37.23670959472656], "page": 55, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6470336914062, 27.93828010559082, 547.2591552734375, 37.60462188720703], "page": 55, "span": [0, 2], "__ref_s3_data": null}], "text": "39", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.39681243896484, 27.93828010559082, 78.4020004272461, 37.67327880859375], "page": 56, "span": [0, 2], "__ref_s3_data": null}], "text": "40", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.661266326904297, 334.4214172363281, 37.33354187011719], "page": 56, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.121826171875, 687.2786865234375, 547.2127075195312, 721.305419921875], "page": 56, "span": [0, 231], "__ref_s3_data": null}], "text": "Based on their respective roles and responsibilities, the users (that is, a group) are controlled by row permissions and column masks. The chart that is shown in Figure 4-2 shows the rules for row and column access in this example.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3048095703125, 373.78326416015625, 317.6307067871094, 383.28173828125], "page": 56, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 4-2 Rules for row and column access", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/12"}, {"prov": [{"bbox": [135.89483642578125, 699.15673828125, 545.2960205078125, 721.2871704101562], "page": 57, "span": [0, 123], "__ref_s3_data": null}], "text": "The chart that is shown in Figure 4-3 shows the column access that is allowed by group and lists the column masks by table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1103973388672, 381.31884765625, 245.0207977294922, 390.2972106933594], "page": 57, "span": [0, 23], "__ref_s3_data": null}], "text": "Figure 4-3 Column masks", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/13"}, {"prov": [{"bbox": [136.20828247070312, 318.3379211425781, 543.4578247070312, 364.5528869628906], "page": 57, "span": [0, 295], "__ref_s3_data": null}], "text": "For the demonstration and testing of RCAC in this example, the following users interact with the database. Furthermore, the column masking rules are developed independently of the row permissions. If a person does not have permission to access the row, the column mask processing does not occur.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.60292053222656, 289.3758239746094, 547.2935791015625, 312.2777404785156], "page": 57, "span": [0, 127], "__ref_s3_data": null}], "text": "GLYPH Hernando Bedoya is a DB2 for i database engineer with the user profile of HBEDOYA. He is part of the DBE group.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6302490234375, 260.23626708984375, 538.9269409179688, 282.8565979003906], "page": 57, "span": [0, 119], "__ref_s3_data": null}], "text": "GLYPH Mike Cain is a DB2 for i database engineer with the user profile of MCAIN. He is part of the DBE group.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5699005126953, 231.489501953125, 492.53729248046875, 254.11923217773438], "page": 57, "span": [0, 137], "__ref_s3_data": null}], "text": "GLYPH Veronica G. Lucchess is a bank account administrator with the user profile of VGLUCCHESS. She is part of the ADMIN group.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56982421875, 201.81741333007812, 534.6511840820312, 225.0314178466797], "page": 57, "span": [0, 115], "__ref_s3_data": null}], "text": "GLYPH Tom Q. Spenser is a bank teller with the user profile of TQSPENSER. He is part of the TELLER group.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4237060546875, 185.3791046142578, 529.1213989257812, 196.2005157470703], "page": 57, "span": [0, 103], "__ref_s3_data": null}], "text": "GLYPH The IT security officer has the user profile of SECURITY. She is not part of any group.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4558868408203, 144.6416473388672, 547.3323364257812, 178.86013793945312], "page": 57, "span": [0, 200], "__ref_s3_data": null}], "text": "GLYPH The online banking web application uses the user profile WEBUSER. This profile is part of the CUSTOMER group. Any future customer-facing applications can also use this group if needed.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.58990478515625, 127.630859375, 530.7957763671875, 138.04251098632812], "page": 57, "span": [0, 96], "__ref_s3_data": null}], "text": "GLYPH Adam O. Olsen is a bank customer with a web application login ID of KLD72CQR8JG.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [214.45973205566406, 27.949493408203125, 523.5935668945312, 37.27492904663086], "page": 57, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5521850585938, 27.93828010559082, 547.2591552734375, 37.53369903564453], "page": 57, "span": [0, 2], "__ref_s3_data": null}], "text": "41", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.40861511230469, 705.659912109375, 283.6307678222656, 721.7451171875], "page": 58, "span": [0, 26], "__ref_s3_data": null}], "text": "4.3 Implementation of RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.34254455566406, 679.0490112304688, 514.2452392578125, 689.4026489257812], "page": 58, "span": [0, 85], "__ref_s3_data": null}], "text": "Figure 4-4 shows the data model of the banking scenario that is used in this example.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0010528564453, 478.4273681640625, 326.9934387207031, 488.0740966796875], "page": 58, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-4 Data model of the banking scenario", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/37"}, {"prov": [{"bbox": [136.00115966796875, 451.8724060058594, 309.19659423828125, 462.0606994628906], "page": 58, "span": [0, 40], "__ref_s3_data": null}], "text": "This section covers the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.69436645507812, 435.15850830078125, 372.9923095703125, 445.3446044921875], "page": 58, "span": [0, 66], "__ref_s3_data": null}], "text": "GLYPH Reviewing the tables that are used in this example", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4688720703125, 422.6627502441406, 490.6497802734375, 433.2422180175781], "page": 58, "span": [0, 84], "__ref_s3_data": null}], "text": "GLYPH Assigning function ID QIBM_DB_SECADM to the Database Engineers group", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56883239746094, 410.80712890625, 376.5571594238281, 421.4914855957031], "page": 58, "span": [0, 69], "__ref_s3_data": null}], "text": "GLYPH Creating group profiles for the users and their roles", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.65866088867188, 398.6716003417969, 384.3678283691406, 409.8951721191406], "page": 58, "span": [0, 62], "__ref_s3_data": null}], "text": "GLYPH Creating the CUSTOMER_LOGIN_ID global variable", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.79405212402344, 386.6734313964844, 320.787353515625, 397.24853515625], "page": 58, "span": [0, 53], "__ref_s3_data": null}], "text": "GLYPH Defining and creating row permissions", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.57369995117188, 374.905517578125, 312.29620361328125, 385.2193908691406], "page": 58, "span": [0, 50], "__ref_s3_data": null}], "text": "GLYPH Defining and creating column masks", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5524444580078, 362.90753173828125, 387.68585205078125, 373.3866882324219], "page": 58, "span": [0, 69], "__ref_s3_data": null}], "text": "GLYPH Restricting the inserting and updating of masked data", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.58221435546875, 351.0097351074219, 334.40216064453125, 361.2847900390625], "page": 58, "span": [0, 56], "__ref_s3_data": null}], "text": "GLYPH Activating row and column access control", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.76596069335938, 338.75787353515625, 271.91436767578125, 349.0521240234375], "page": 58, "span": [0, 41], "__ref_s3_data": null}], "text": "GLYPH Reviewing row permissions", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.70089721679688, 327.16015625, 323.4725036621094, 337.5362854003906], "page": 58, "span": [0, 51], "__ref_s3_data": null}], "text": "GLYPH Demonstrating data access with RCAC", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4153289794922, 315.0921630859375, 343.3009948730469, 325.6691589355469], "page": 58, "span": [0, 56], "__ref_s3_data": null}], "text": "GLYPH Query implementation with RCAC activated", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.28388214111328, 282.4974060058594, 410.4787292480469, 295.5218505859375], "page": 58, "span": [0, 56], "__ref_s3_data": null}], "text": "4.3.1 Reviewing the tables that are used in this example", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.78396606445312, 235.11878967285156, 541.1093139648438, 269.3280334472656], "page": 58, "span": [0, 214], "__ref_s3_data": null}], "text": "This section reviews the tables that are used in this example. As shown in Figure 4-5, there are three main tables that are involved in the data model: CUSTOMERS, ACCOUNTS, and TRANSACTIONS. There are 90 customers.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.12417602539062, 151.3236541748047, 360.94549560546875, 160.32456970214844], "page": 58, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 4-5 Tables that are used in the banking example", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [142.4352264404297, 89.66874694824219, 525.7510986328125, 124.07282257080078], "page": 58, "span": [0, 195], "__ref_s3_data": null}], "text": "Note: Appendix A, \"Database definitions for the RCAC banking example\" on page 121 provides a script that you can use to create all the database definitions or DDLs to re-create this RCAC example.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.37181854248047, 27.93828010559082, 78.4020004272461, 37.574485778808594], "page": 58, "span": [0, 2], "__ref_s3_data": null}], "text": "42", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.711872100830078, 334.43927001953125, 37.300533294677734], "page": 58, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.89007568359375, 698.94091796875, 525.0703125, 721.3560791015625], "page": 59, "span": [0, 106], "__ref_s3_data": null}], "text": "To review the attributes of each table that is used in this banking example, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.79959106445312, 670.1790161132812, 543.2303466796875, 694.369873046875], "page": 59, "span": [0, 136], "__ref_s3_data": null}], "text": "1. Review the columns of each the tables through System i Navigator. Expand Database \uf0ae named Database \uf0ae Schemas \uf0ae BANK_SCHEMA \uf0ae Tables .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.16558837890625, 629.1010131835938, 546.8358764648438, 663.4356079101562], "page": 59, "span": [0, 253], "__ref_s3_data": null}], "text": "2. Right-click the CUSTOMERS table and select Definition . Figure 4-6 shows the attributes for the CUSTOMERS table. The Row access control and Column access control options are not selected, which indicates that the table does not have RCAC implemented.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.14613342285156, 418.0169982910156, 303.8804626464844, 427.2879943847656], "page": 59, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-6 CUSTOMERS table attributes", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/38"}, {"prov": [{"bbox": [135.9766082763672, 378.66107177734375, 517.3616333007812, 401.477294921875], "page": 59, "span": [0, 91], "__ref_s3_data": null}], "text": "3. Click the Columns tab to see the columns of the CUSTOMERS table, as shown in Figure 4-7.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.3719711303711, 169.74649047851562, 294.08258056640625, 179.3585662841797], "page": 59, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 4-7 Column definitions of the CUSTOMERS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [214.48805236816406, 27.967016220092773, 523.5935668945312, 37.22077178955078], "page": 59, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6427001953125, 27.93828010559082, 547.2591552734375, 37.57048034667969], "page": 59, "span": [0, 2], "__ref_s3_data": null}], "text": "43", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.2916488647461, 27.93828010559082, 78.4020004272461, 37.80024719238281], "page": 60, "span": [0, 2], "__ref_s3_data": null}], "text": "44", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.41600799560547, 27.63385009765625, 334.4485168457031, 37.373573303222656], "page": 60, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.0037384033203, 675.2788696289062, 538.2010498046875, 721.4434204101562], "page": 60, "span": [0, 263], "__ref_s3_data": null}], "text": "4. Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the CUSTOMERS table, as shown in Figure 4-8. There are no Foreign Key Constraints or Check Constraints on the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.43617248535156, 473.768798828125, 396.242431640625, 483.36297607421875], "page": 60, "span": [0, 59], "__ref_s3_data": null}], "text": "Figure 4-8 Reviewing the constraints on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/39"}, {"prov": [{"bbox": [136.3007049560547, 434.6003723144531, 542.1918334960938, 457.54901123046875], "page": 60, "span": [0, 154], "__ref_s3_data": null}], "text": "5. Review the definition of the ACCOUNTS table. The definition of the ACCOUNTS table is shown in Figure 4-9. RCAC has not been defined for this table yet.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.3052520751953, 219.217041015625, 297.04034423828125, 228.72657775878906], "page": 60, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 4-9 ACCOUNTS table attributes", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/40"}, {"prov": [{"bbox": [135.99215698242188, 698.8906860351562, 509.6353759765625, 721.5469970703125], "page": 61, "span": [0, 91], "__ref_s3_data": null}], "text": "6. Click the Columns tab to see the columns of the ACCOUNTS table, as shown in Figure 4-10.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.3238754272461, 545.0313110351562, 291.8764343261719, 554.8672485351562], "page": 61, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 4-10 Column definitions of the ACCOUNTS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [135.97557067871094, 482.25897216796875, 538.2010498046875, 528.322021484375], "page": 61, "span": [0, 265], "__ref_s3_data": null}], "text": "7. Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the ACCOUNTS table, as shown in Figure 4-11. There is one Foreign Key Constraint and no Check Constraints on the ACCOUNTS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.44501495361328, 294.8391418457031, 322.40972900390625, 304.5721740722656], "page": 61, "span": [0, 59], "__ref_s3_data": null}], "text": "Figure 4-11 Reviewing the constraints on the ACCOUNTS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [214.4091796875, 27.91837501525879, 523.5935668945312, 37.27334213256836], "page": 61, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.630126953125, 27.93828010559082, 547.2591552734375, 37.63974380493164], "page": 61, "span": [0, 2], "__ref_s3_data": null}], "text": "45", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.2132110595703, 698.5640869140625, 547.2595825195312, 721.408447265625], "page": 62, "span": [0, 157], "__ref_s3_data": null}], "text": "8. Review the definition of the TRANSACTIONS table. The definition of the TRANSACTIONS table is shown in Figure 4-12. RCAC is not defined for this table yet.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.18008422851562, 483.1399230957031, 322.06451416015625, 492.7385559082031], "page": 62, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-12 TRANSACTIONS table attributes", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/41"}, {"prov": [{"bbox": [136.16123962402344, 443.9065856933594, 531.8204345703125, 466.9815979003906], "page": 62, "span": [0, 95], "__ref_s3_data": null}], "text": "9. Click the Columns tab to see the columns of the TRANSACTIONS table, as shown in Figure 4-13.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.9007568359375, 299.6871032714844, 383.970458984375, 309.5373229980469], "page": 62, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 4-13 Column definitions of the TRANSACTIONS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.8000030517578, 237.45896911621094, 547.3941040039062, 283.4495849609375], "page": 62, "span": [0, 273], "__ref_s3_data": null}], "text": "10.Click the Key Constraints , Foreign Key Constraints , and Check Constraints tabs to review the key, foreign, and check constraints on the TRANSACTIONS table, as shown in Figure 4-14. There is one Foreign Key Constraint and one Check Constraint on the TRANSACTIONS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.38268280029297, 57.628116607666016, 342.353271484375, 67.44571685791016], "page": 62, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-14 Reviewing the constraints on the TRANSACTIONS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.2685317993164, 27.93828010559082, 78.4020004272461, 37.63324737548828], "page": 62, "span": [0, 2], "__ref_s3_data": null}], "text": "46", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.30252075195312, 27.62788200378418, 334.4423828125, 37.370174407958984], "page": 62, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.1913299560547, 699.0999755859375, 527.005615234375, 721.2046508789062], "page": 63, "span": [0, 167], "__ref_s3_data": null}], "text": "Now that you have reviewed the database model for this example, the following sections describe the steps that are required to implement RCAC in this banking scenario.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.96824645996094, 650.2410278320312, 532.1195068359375, 679.559814453125], "page": 63, "span": [0, 74], "__ref_s3_data": null}], "text": "4.3.2 Assigning function ID QIBM_DB_SECADM to the Database Engineers group", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.59634399414062, 603.2190551757812, 531.9879150390625, 637.3970336914062], "page": 63, "span": [0, 250], "__ref_s3_data": null}], "text": "The first step is to assign the appropriate function usage ID to the Database Engineers (DBEs) that will be implementing RCAC. For a description of function usage IDs, see 2.1, \"Roles\" on page 8. In this example, the DBEs are users MCAIN and HBEDOYA.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.24610900878906, 580.7687377929688, 266.8606872558594, 591.4186401367188], "page": 63, "span": [0, 29], "__ref_s3_data": null}], "text": "Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80001831054688, 552.19091796875, 544.5436401367188, 574.6569213867188], "page": 63, "span": [0, 103], "__ref_s3_data": null}], "text": "1. Right-click the database connection and select Application Administration , as shown in Figure 4-15.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0865936279297, 289.0970153808594, 292.8174133300781, 298.57464599609375], "page": 63, "span": [0, 38], "__ref_s3_data": null}], "text": "Figure 4-15 Application administration", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/42"}, {"prov": [{"bbox": [214.49533081054688, 27.961729049682617, 523.5935668945312, 37.22500228881836], "page": 63, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.54150390625, 27.93828010559082, 547.2591552734375, 37.57440948486328], "page": 63, "span": [0, 2], "__ref_s3_data": null}], "text": "47", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.28382873535156, 27.93828010559082, 78.4020004272461, 37.64601135253906], "page": 64, "span": [0, 2], "__ref_s3_data": null}], "text": "48", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.715648651123047, 334.4350891113281, 37.335201263427734], "page": 64, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.07907104492188, 698.89013671875, 530.2109985351562, 723.349853515625], "page": 64, "span": [0, 165], "__ref_s3_data": null}], "text": "2. The Application Administration window opens, as shown in Figure 4-16. Click IBM i \uf0ae Database and select the function usage ID of Database Security Administrator .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.70782470703125, 390.7132568359375, 329.6557312011719, 400.10400390625], "page": 64, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 4-16 Application administration for IBM i", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/43"}, {"prov": [{"bbox": [136.16891479492188, 351.7818908691406, 544.5723266601562, 374.1484375], "page": 64, "span": [0, 105], "__ref_s3_data": null}], "text": "3. Click Customize for the function usage ID of Database Security Administrator, as shown in Figure 4-17.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0600128173828, 169.8534393310547, 459.35382080078125, 179.54444885253906], "page": 64, "span": [0, 77], "__ref_s3_data": null}], "text": "Figure 4-17 Customizing the Database Security Administrator function usage ID", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/44"}, {"prov": [{"bbox": [136.06375122070312, 687.2783203125, 547.1973876953125, 721.346923828125], "page": 65, "span": [0, 187], "__ref_s3_data": null}], "text": "4. The Customize Access window opens, as shown in Figure 4-18. Click the users that need to implement RCAC. For this example, HBEDOYA and MCAIN are selected. Click Add and then click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.98194885253906, 377.9023132324219, 297.5574035644531, 387.4375], "page": 65, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-18 Customize Access window", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/45"}, {"prov": [{"bbox": [136.29513549804688, 326.8274230957031, 537.650146484375, 361.49859619140625], "page": 65, "span": [0, 180], "__ref_s3_data": null}], "text": "5. The Application Administrator window opens again. The function usage ID of Database Security Administrator now has an X in the Customized Access column, as shown in Figure 4-19.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.44737243652344, 196.30667114257812, 443.8832092285156, 206.25372314453125], "page": 65, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-19 Function usage ID Database Security Administrator customized", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/46"}, {"prov": [{"bbox": [214.483154296875, 27.942846298217773, 523.5935668945312, 37.24003219604492], "page": 65, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6575317382812, 27.93828010559082, 547.2591552734375, 37.754249572753906], "page": 65, "span": [0, 2], "__ref_s3_data": null}], "text": "49", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.40753936767578, 27.93828010559082, 78.4020004272461, 37.70213317871094], "page": 66, "span": [0, 2], "__ref_s3_data": null}], "text": "50", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.658397674560547, 334.4214172363281, 37.3441162109375], "page": 66, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.18582153320312, 698.6884765625, 545.5703735351562, 721.4199829101562], "page": 66, "span": [0, 117], "__ref_s3_data": null}], "text": "6. Run an SQL query that shows which user profiles are enabled to define RCAC. The SQL query is shown in Figure 4-20.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.12620544433594, 507.701904296875, 438.67242431640625, 517.6649169921875], "page": 66, "span": [0, 74], "__ref_s3_data": null}], "text": "Figure 4-20 Query to display user profiles with function usage ID for RCAC", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.1628189086914, 474.81817626953125, 418.56524658203125, 488.2868957519531], "page": 66, "span": [0, 59], "__ref_s3_data": null}], "text": "4.3.3 Creating group profiles for the users and their roles", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.925048828125, 415.64404296875, 547.2724609375, 461.91180419921875], "page": 66, "span": [0, 307], "__ref_s3_data": null}], "text": "The next step is to create the different group profiles (ADMIN, CUSTOMER, TELLER, and DBE) and assign the different user profiles to the different group profiles. For a description of the different groups and users for this example, see 4.2, \"Description of the users roles and responsibilities\" on page 39.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.11614990234375, 393.6219787597656, 266.8606872558594, 404.0909118652344], "page": 66, "span": [0, 29], "__ref_s3_data": null}], "text": "Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 364.826416015625, 535.997802734375, 387.3473205566406], "page": 66, "span": [0, 120], "__ref_s3_data": null}], "text": "1. On the main navigation pane of System i Navigator, right-click Groups and select New Group , as shown in Figure 4-21.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0914306640625, 191.87730407714844, 281.48504638671875, 201.3507080078125], "page": 66, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-21 Creating group profiles", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/47"}, {"prov": [{"bbox": [136.197265625, 685.2219848632812, 547.2084350585938, 721.3710327148438], "page": 67, "span": [0, 240], "__ref_s3_data": null}], "text": "2. The New Group window opens, as shown in Figure 4-22. For each new group, enter the Group name (ADMIN, CUSTOMER, TELLER, and DBE) and add the user profiles that are associated to this group by selecting the user profile and clicking Add .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.84339904785156, 670.1790771484375, 482.46234130859375, 680.8810424804688], "page": 67, "span": [0, 68], "__ref_s3_data": null}], "text": "Figure 4-22 shows adding user TQSPENCER to the TELLER group profile.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3377685546875, 418.9886779785156, 352.9278259277344, 428.8454895019531], "page": 67, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 4-22 Creating group profiles and adding users", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/48"}, {"prov": [{"bbox": [136.06529235839844, 380.3785400390625, 537.6182861328125, 402.5232849121094], "page": 67, "span": [0, 151], "__ref_s3_data": null}], "text": "3. After you create all the group profiles, you should see them listed in System i Navigator under Users and Groups \uf0ae Groups , as shown in Figure 4-23.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.22373962402344, 229.8401641845703, 304.0131530761719, 239.18055725097656], "page": 67, "span": [0, 40], "__ref_s3_data": null}], "text": "Figure 4-23 Newly created group profiles", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/49"}, {"prov": [{"bbox": [214.4501495361328, 27.94533920288086, 523.5935668945312, 37.25483703613281], "page": 67, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.784423828125, 27.93828010559082, 547.2591552734375, 37.451446533203125], "page": 67, "span": [0, 2], "__ref_s3_data": null}], "text": "51", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.25196838378906, 708.2222290039062, 420.2837219238281, 721.6193237304688], "page": 68, "span": [0, 52], "__ref_s3_data": null}], "text": "4.3.4 Creating the CUSTOMER_LOGIN_ID global variable", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.94598388671875, 660.9389038085938, 545.7725219726562, 695.162353515625], "page": 68, "span": [0, 244], "__ref_s3_data": null}], "text": "In this step, you create a global variable that is used to capture the Customer_Login_ID information, which is required to validate the permissions. For more information about global variables, see 3.2.2, \"Built-in global variables\" on page 19.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.34487915039062, 638.8994750976562, 266.8606872558594, 649.5125122070312], "page": 68, "span": [0, 29], "__ref_s3_data": null}], "text": "Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 610.179931640625, 536.1627197265625, 632.689453125], "page": 68, "span": [0, 145], "__ref_s3_data": null}], "text": "1. From System i Navigator, under the schema Bank_Schema, right-click Global Variable and select New \uf0ae Global Variable , as shown in Figure 4-24.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.28884887695312, 375.1119689941406, 292.1772766113281, 384.5009765625], "page": 68, "span": [0, 38], "__ref_s3_data": null}], "text": "Figure 4-24 Creating a global variable", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/50"}, {"prov": [{"bbox": [136.0622100830078, 312.438720703125, 541.1919555664062, 358.7922058105469], "page": 68, "span": [0, 314], "__ref_s3_data": null}], "text": "2. The New Global Variable window opens, as shown in Figure 4-25. Enter the global variable name of CUSTOMER_LOGIN_ID, select the data type of VARCHAR, and leave the default value of NULL. This default value ensures that users that do not use the web interface do not have permission to access the data. Click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.36365509033203, 70.82534790039062, 347.12200927734375, 81.17611694335938], "page": 68, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-25 Creating a global variable called CUSTOMER_LOGIN_ID", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/51"}, {"prov": [{"bbox": [64.50041961669922, 27.93828010559082, 78.4020004272461, 37.69797897338867], "page": 68, "span": [0, 2], "__ref_s3_data": null}], "text": "52", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.74216079711914, 334.4244079589844, 37.310428619384766], "page": 68, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.37158203125, 686.9842529296875, 547.2542724609375, 721.395263671875], "page": 69, "span": [0, 216], "__ref_s3_data": null}], "text": "3. Now that the global variable is created, assign permissions to the variable so that it can be set by the program. Right-click the CUSTOMER_LOGIN_ID global variable and select Permissions , as shown in Figure 4-26.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.04354858398438, 540.4573364257812, 457.02374267578125, 550.0607299804688], "page": 69, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-26 Setting permissions on the CUSTOMER_LOGIN_ID global variable", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/52"}, {"prov": [{"bbox": [135.89535522460938, 501.52752685546875, 534.23876953125, 524.0010986328125], "page": 69, "span": [0, 147], "__ref_s3_data": null}], "text": "4. The Permissions window opens, as shown in Figure 4-27. Select Change authority for Webuser so that the application can set this global variable.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.090087890625, 203.91615295410156, 540.200439453125, 213.53598022460938], "page": 69, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 4-27 Setting change permissions for Webuser on the CUSTOMER_LOGIN_ID global variable", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/53"}, {"prov": [{"bbox": [214.47042846679688, 27.937862396240234, 523.5935668945312, 37.26201248168945], "page": 69, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.8599853515625, 27.93828010559082, 547.2591552734375, 37.51277542114258], "page": 69, "span": [0, 2], "__ref_s3_data": null}], "text": "53", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.25285339355469, 708.1456909179688, 339.9589538574219, 721.7003784179688], "page": 70, "span": [0, 43], "__ref_s3_data": null}], "text": "4.3.5 Defining and creating row permissions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.81446838378906, 685.0341796875, 527.3794555664062, 695.3024291992188], "page": 70, "span": [0, 88], "__ref_s3_data": null}], "text": "You now ready to define the row permissions of the tables. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 643.4611206054688, 530.7598266601562, 680.3302612304688], "page": 70, "span": [0, 169], "__ref_s3_data": null}], "text": "1. From the navigation pane of System i Navigator, click Schemas \uf0ae BANK_SCHEMA , right-click Row Permissions , and select New \uf0ae Row Permission , as shown in Figure 4-28.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.02081298828125, 359.8767395019531, 313.7925720214844, 369.4545593261719], "page": 70, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-28 Selecting new row permissions", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/54"}, {"prov": [{"bbox": [64.4588623046875, 27.93828010559082, 78.4020004272461, 37.646766662597656], "page": 70, "span": [0, 2], "__ref_s3_data": null}], "text": "54", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.39488220214844, 27.697824478149414, 334.4214172363281, 37.306575775146484], "page": 70, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.299072265625, 686.8775024414062, 544.505126953125, 721.486083984375], "page": 71, "span": [0, 213], "__ref_s3_data": null}], "text": "2. The New Row Permission window opens, as shown in Figure 4-29. Enter the information regarding the row permissions on the CUSTOMERS table. This row permission defines what is established in the following policy:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.24830627441406, 658.2987060546875, 542.1815185546875, 680.8968505859375], "page": 71, "span": [0, 89], "__ref_s3_data": null}], "text": "-User profiles that belong to DBE, ADMIN, and TELLER group profiles can see all the rows.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.38929748535156, 581.2600708007812, 537.7831420898438, 651.4755249023438], "page": 71, "span": [0, 438], "__ref_s3_data": null}], "text": "-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.33180236816406, 564.0100708007812, 381.3265380859375, 574.4090576171875], "page": 71, "span": [0, 51], "__ref_s3_data": null}], "text": "-Any other user profile cannot see any rows at all.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.9643096923828, 547.23583984375, 314.7688293457031, 557.5918579101562], "page": 71, "span": [0, 37], "__ref_s3_data": null}], "text": "Select the Enabled option. Click OK .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.99803161621094, 252.98165893554688, 384.5369873046875, 262.90484619140625], "page": 71, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 4-29 New row permissions on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/55"}, {"prov": [{"bbox": [214.46267700195312, 27.962182998657227, 523.5935668945312, 37.22908020019531], "page": 71, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7837524414062, 27.93828010559082, 547.2591552734375, 37.501976013183594], "page": 71, "span": [0, 2], "__ref_s3_data": null}], "text": "55", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.47308349609375, 674.9223022460938, 543.8363647460938, 721.25634765625], "page": 72, "span": [0, 263], "__ref_s3_data": null}], "text": "3. Define the row permissions for the ACCOUNTS table. The New Row Permission window opens, as shown in Figure 4-30. Enter the information regarding the row permissions on the ACCOUNTS table. This row permission defines what is established in the following policy:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.37701416015625, 646.2992553710938, 539.4539794921875, 668.8794555664062], "page": 72, "span": [0, 88], "__ref_s3_data": null}], "text": "-User profiles that belong to DBE, ADMIN and TELLER group profiles can see all the rows.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.27256774902344, 569.2606201171875, 537.7576904296875, 639.7597045898438], "page": 72, "span": [0, 438], "__ref_s3_data": null}], "text": "-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.14382934570312, 552.2808227539062, 381.32696533203125, 562.4356079101562], "page": 72, "span": [0, 51], "__ref_s3_data": null}], "text": "-Any other user profile cannot see any rows at all.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.93441772460938, 535.1266479492188, 314.7692565917969, 545.3858032226562], "page": 72, "span": [0, 37], "__ref_s3_data": null}], "text": "Select the Enabled option. Click OK .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.46953582763672, 197.779296875, 305.92193603515625, 207.01873779296875], "page": 72, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 4-30 New row permissions on the ACCOUNTS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/56"}, {"prov": [{"bbox": [64.427490234375, 27.93828010559082, 78.4020004272461, 37.57078552246094], "page": 72, "span": [0, 2], "__ref_s3_data": null}], "text": "56", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.38003540039062, 27.699562072753906, 334.4214172363281, 37.30242156982422], "page": 72, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.15975952148438, 674.8258666992188, 529.9049072265625, 721.1845703125], "page": 73, "span": [0, 270], "__ref_s3_data": null}], "text": "4. Define the row permissions on the TRANSACTIONS table. The New Row Permission window opens, as shown in Figure 4-31. Enter the information regarding the row permissions on the TRANSACTIONS table. This row permission defines what is established in the following policy:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.35374450683594, 646.2988891601562, 547.229248046875, 668.8553466796875], "page": 73, "span": [0, 92], "__ref_s3_data": null}], "text": "-User profiles that belong to DBE, ADMIN, and TELLER group profiles can see all of the rows.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.26327514648438, 569.26025390625, 537.7831420898438, 639.7720336914062], "page": 73, "span": [0, 438], "__ref_s3_data": null}], "text": "-User profiles that belong to the CUSTOMERS group profile (that is, the WEBUSER user) can see only the rows that match their customer login ID. The login ID value representing the online banking user is passed from the web application to the database by using the global variable CUSTOMER_LOGIN_ID. The permission rule uses a subquery to check whether the global variable matches the CUSTOMER_LOGIN_ID column value in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [170.82107543945312, 505.298828125, 533.3919677734375, 551.484375], "page": 73, "span": [0, 285], "__ref_s3_data": null}], "text": "Note: You must join back to ACCOUNTS and then to CUSTOMERS by using a subquery to check whether the global variable matches CUSTOMER_LOGIN_ID. Also, if the row permission or column mask rule text references another table with RCAC defined, the RCAC for the referenced table is ignored.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.84132385253906, 459.28094482421875, 381.3265380859375, 485.8647155761719], "page": 73, "span": [0, 89], "__ref_s3_data": null}], "text": "-Any other user profile cannot see any rows at all. Select the Enabled option. Click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.39651489257812, 68.98163604736328, 325.65087890625, 78.23909759521484], "page": 73, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-31 New row permissions on the TRANSACTIONS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/57"}, {"prov": [{"bbox": [214.43179321289062, 28.01248550415039, 523.5935668945312, 37.145240783691406], "page": 73, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.70849609375, 27.93828010559082, 547.2591552734375, 37.42893600463867], "page": 73, "span": [0, 2], "__ref_s3_data": null}], "text": "57", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.43508911132812, 27.93828010559082, 78.4020004272461, 37.60900115966797], "page": 74, "span": [0, 2], "__ref_s3_data": null}], "text": "58", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.694499969482422, 334.4214172363281, 37.342376708984375], "page": 74, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.46478271484375, 687.2786865234375, 521.1912231445312, 721.3973999023438], "page": 74, "span": [0, 175], "__ref_s3_data": null}], "text": "5. To verify that the row permissions are enabled, from System i Navigator, click Row Permissions , as shown in Figure 4-32. The three row permissions are created and enabled.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.27345275878906, 507.8507385253906, 293.2009582519531, 517.7713012695312], "page": 74, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 4-32 List of row permissions on BANK_SCHEMA", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/58"}, {"prov": [{"bbox": [64.18543243408203, 474.9947814941406, 327.4058837890625, 488.12005615234375], "page": 74, "span": [0, 40], "__ref_s3_data": null}], "text": "4.3.6 Defining and creating column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.18695068359375, 451.67236328125, 479.4200134277344, 461.9885559082031], "page": 74, "span": [0, 76], "__ref_s3_data": null}], "text": "This section defines the masks on the columns. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.79998779296875, 411.03912353515625, 523.0706787109375, 447.3500061035156], "page": 74, "span": [0, 168], "__ref_s3_data": null}], "text": "1. From the main navigation pane of System i Navigator, click Schemas \uf0ae BANK_SCHEMA , right-click Column Masks , and select New \uf0ae Column Mask , as shown in Figure 4-33.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.06741333007812, 210.88377380371094, 288.04827880859375, 220.59603881835938], "page": 74, "span": [0, 34], "__ref_s3_data": null}], "text": "Figure 4-33 Creating a column mask", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/59"}, {"prov": [{"bbox": [136.38919067382812, 699.2781372070312, 524.1021728515625, 721.3534545898438], "page": 75, "span": [0, 97], "__ref_s3_data": null}], "text": "2. In the New Column Mask window, which is shown in Figure 4-34, enter the following information:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.30136108398438, 682.29833984375, 465.4696044921875, 692.8587646484375], "page": 75, "span": [0, 63], "__ref_s3_data": null}], "text": "-Select the CUSTOMERS table on which to create the column mask.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.126708984375, 670.2985229492188, 475.1905212402344, 680.7498168945312], "page": 75, "span": [0, 66], "__ref_s3_data": null}], "text": "-Select the Column to mask; in this example, it is CUSTOMER_EMAIL.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.1351318359375, 634.299072265625, 531.3062133789062, 668.7997436523438], "page": 75, "span": [0, 207], "__ref_s3_data": null}], "text": "-Define the masking logic depending on the rules that you want to enforce. In this example, either the ADMIN or CUSTOMER group profiles can see the entire email address; otherwise, it is masked to ****@****.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.8079833984375, 617.1079711914062, 314.766845703125, 627.5975952148438], "page": 75, "span": [0, 37], "__ref_s3_data": null}], "text": "Select the Enabled option. Click OK .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.46917724609375, 177.73121643066406, 395.4768981933594, 187.4214630126953], "page": 75, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-34 Defining a column mask on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/60"}, {"prov": [{"bbox": [136.0830841064453, 151.05543518066406, 522.032958984375, 161.2145538330078], "page": 75, "span": [0, 84], "__ref_s3_data": null}], "text": "3. Repeat steps 1 on page 58 and 2 to create column masks for the following columns:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.3282928466797, 134.0785675048828, 381.9765319824219, 144.7090301513672], "page": 75, "span": [0, 34], "__ref_s3_data": null}], "text": "-MASK_DRIVERS_LICENSE_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.311767578125, 122.0787582397461, 335.7012023925781, 132.44134521484375], "page": 75, "span": [0, 27], "__ref_s3_data": null}], "text": "-MASK_LOGIN_ID_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.41925048828125, 109.89730834960938, 446.63970947265625, 120.5661392211914], "page": 75, "span": [0, 43], "__ref_s3_data": null}], "text": "-MASK_SECURITY_QUESTION_ANSWER_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.36715698242188, 98.07913970947266, 379.42840576171875, 108.87069702148438], "page": 75, "span": [0, 32], "__ref_s3_data": null}], "text": "-MASK_ACCOUNT_NUMBER_ON_ACCOUNTS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.2811737060547, 86.05474090576172, 397.17034912109375, 96.75386047363281], "page": 75, "span": [0, 36], "__ref_s3_data": null}], "text": "-MASK_SECURITY_QUESTION_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.15771484375, 74.07952117919922, 322.7781066894531, 84.470703125], "page": 75, "span": [0, 25], "__ref_s3_data": null}], "text": "-MASK_TAX_ID_ON_CUSTOMERS", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [214.41281127929688, 27.987712860107422, 523.5935668945312, 37.20513916015625], "page": 75, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.8348999023438, 27.93828010559082, 547.2591552734375, 37.66927719116211], "page": 75, "span": [0, 2], "__ref_s3_data": null}], "text": "59", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.83897399902344, 698.9580688476562, 525.707275390625, 721.4640502929688], "page": 76, "span": [0, 166], "__ref_s3_data": null}], "text": "4. To verify that the column masks are enabled, from System i Navigator, click Column Masks , as shown in Figure 4-35. The seven column masks are created and enabled.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.25138854980469, 599.3970336914062, 285.23284912109375, 608.34912109375], "page": 76, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-35 List of column masks on BANK_SCHEMA", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/61"}, {"prov": [{"bbox": [64.04530334472656, 566.3858032226562, 433.7906494140625, 579.4189453125], "page": 76, "span": [0, 59], "__ref_s3_data": null}], "text": "4.3.7 Restricting the inserting and updating of masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.80776977539062, 506.8577575683594, 544.9268798828125, 553.2254638671875], "page": 76, "span": [0, 270], "__ref_s3_data": null}], "text": "This step defines the check constraints that support the column masks to make sure that on INSERTS or UPDATES, data is not written with a masked value. For more information about the propagation of masked data, see 6.8, \"Avoiding propagation of masked data\" on page 108.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.28085327148438, 484.9208679199219, 266.8606872558594, 495.3861083984375], "page": 76, "span": [0, 29], "__ref_s3_data": null}], "text": "Complete the following steps:", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.7621612548828, 444.3561096191406, 547.1956787109375, 478.94757080078125], "page": 76, "span": [0, 206], "__ref_s3_data": null}], "text": "1. Create a check constraint on the column CUSTOMER_EMAIL in the CUSTOMERS table. From the navigation pane of System i Navigator, right-click the CUSTOMERS table and select Definition , as shown Figure 4-36", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.76719665527344, 311.3090515136719, 334.2453308105469, 320.839111328125], "page": 76, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-36 Definition of the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/62"}, {"prov": [{"bbox": [135.84291076660156, 272.2760314941406, 547.7396240234375, 294.8913879394531], "page": 76, "span": [0, 113], "__ref_s3_data": null}], "text": "2. From the CUSTOMERS definition window, click the Check Constraints tab and click Add , as shown in Figure 4-37.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.62763977050781, 179.6382293701172, 221.8394317626953, 189.23858642578125], "page": 76, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-37 Adding a check constraint", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/63"}, {"prov": [{"bbox": [64.37886810302734, 27.93828010559082, 78.4020004272461, 37.67101287841797], "page": 76, "span": [0, 2], "__ref_s3_data": null}], "text": "60", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.39566802978516, 27.68936538696289, 334.4214172363281, 37.33694076538086], "page": 76, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.3705596923828, 698.8788452148438, 515.8154907226562, 721.3765869140625], "page": 77, "span": [0, 96], "__ref_s3_data": null}], "text": "3. The New Check Constraint window opens, as shown in Figure 4-38. Complete the following steps:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.74546813964844, 682.29833984375, 344.06817626953125, 692.7590942382812], "page": 77, "span": [0, 36], "__ref_s3_data": null}], "text": "a. Select the CUSTOMER_EMAIL column.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.6258544921875, 653.2589721679688, 541.599853515625, 675.6708984375], "page": 77, "span": [0, 137], "__ref_s3_data": null}], "text": "b. Enter the check constraint condition. In this example, specify CUSTOMER_EMAIL to be different from ****@****, which is the mask value.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.5522003173828, 636.2342529296875, 511.9270324707031, 646.7601928710938], "page": 77, "span": [0, 78], "__ref_s3_data": null}], "text": "c. Select the On update violation, preserve column value option and click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.45979309082031, 129.19110107421875, 362.47125244140625, 138.60385131835938], "page": 77, "span": [0, 68], "__ref_s3_data": null}], "text": "Figure 4-38 Specifying a new check constraint on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/64"}, {"prov": [{"bbox": [214.4756317138672, 27.958784103393555, 523.5935668945312, 37.23066711425781], "page": 77, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7523803710938, 27.93828010559082, 547.2591552734375, 37.49419403076172], "page": 77, "span": [0, 2], "__ref_s3_data": null}], "text": "61", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.7649688720703, 698.927978515625, 535.5555419921875, 721.4752197265625], "page": 78, "span": [0, 159], "__ref_s3_data": null}], "text": "4. Figure 4-39 shows that there is now a check constraint on the CUSTOMERS table that prevents any masked data from being updated to the CUSTOMER_EMAIL column.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.41434478759766, 395.8761901855469, 294.2294006347656, 405.31463623046875], "page": 78, "span": [0, 51], "__ref_s3_data": null}], "text": "Figure 4-39 Check constraint on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/65"}, {"prov": [{"bbox": [136.1330108642578, 344.68316650390625, 547.2733154296875, 379.44927978515625], "page": 78, "span": [0, 201], "__ref_s3_data": null}], "text": "5. Create all the other check constraints that are associated to each of the masks on the CUSTOMERS table. After this is done, these constraints should look like the ones that are shown in Figure 4-40.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.48959350585938, 179.8015899658203, 323.02691650390625, 189.43350219726562], "page": 78, "span": [0, 60], "__ref_s3_data": null}], "text": "Figure 4-40 List of check constraints on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/66"}, {"prov": [{"bbox": [64.48886108398438, 27.93828010559082, 78.4020004272461, 37.6668586730957], "page": 78, "span": [0, 2], "__ref_s3_data": null}], "text": "62", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.68883514404297, 334.4214172363281, 37.32878112792969], "page": 78, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.339111328125, 708.2562866210938, 360.40594482421875, 721.5789184570312], "page": 79, "span": [0, 46], "__ref_s3_data": null}], "text": "4.3.8 Activating row and column access control", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.94613647460938, 673.1777954101562, 516.2232055664062, 695.4234008789062], "page": 79, "span": [0, 101], "__ref_s3_data": null}], "text": "You are now ready to activate RCAC on all three tables in this example. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 631.5825805664062, 542.7099609375, 666.0606689453125], "page": 79, "span": [0, 239], "__ref_s3_data": null}], "text": "1. Start by enabling RCAC on the CUSTOMERS table. From System i Navigator, right-click the CUSTOMERS table and select Definition . As shown in Figure 4-41, make sure that you select Row access control and Column access control . Click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.31451416015625, 447.11505126953125, 361.9471130371094, 456.8612365722656], "page": 79, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 4-41 Enabling RCAC on the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/67"}, {"prov": [{"bbox": [136.09698486328125, 396.5186462402344, 537.477783203125, 430.6976623535156], "page": 79, "span": [0, 198], "__ref_s3_data": null}], "text": "2. Enable RCAC on the ACCOUNTS table. Right-click the ACCOUNTS table and select Definition . As shown Figure 4-42, make sure that you select Row access control and Column access control . Click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.55694580078125, 206.38462829589844, 318.4848937988281, 215.95945739746094], "page": 79, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-42 Enabling RCAC on ACCOUNTS", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/68"}, {"prov": [{"bbox": [214.43182373046875, 27.920791625976562, 523.5935668945312, 37.256046295166016], "page": 79, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7835693359375, 27.93828010559082, 547.2591552734375, 37.51277542114258], "page": 79, "span": [0, 2], "__ref_s3_data": null}], "text": "63", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.34092712402344, 687.2786865234375, 544.6798706054688, 721.5467529296875], "page": 80, "span": [0, 183], "__ref_s3_data": null}], "text": "3. Enable RCAC on the TRANSACTIONS table. Right-click the TRANSACTIONS table and select Definition . As shown in Figure 4-43, make sure that you select Row access control . Click OK .", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.18115234375, 502.9380187988281, 338.5848083496094, 512.6725463867188], "page": 80, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-43 Enabling RCAC on TRANSACTIONS", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/69"}, {"prov": [{"bbox": [64.24539184570312, 470.9652404785156, 271.11932373046875, 484.2687072753906], "page": 80, "span": [0, 31], "__ref_s3_data": null}], "text": "4.3.9 Reviewing row permissions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.09507751464844, 435.4518737792969, 535.83544921875, 458.0428771972656], "page": 80, "span": [0, 96], "__ref_s3_data": null}], "text": "This section displays all the row permissions after enabling RCAC. Complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80099487304688, 394.49609375, 533.23095703125, 428.60400390625], "page": 80, "span": [0, 174], "__ref_s3_data": null}], "text": "1. From System i Navigator, click Row Permissions , as shown in Figure 4-44. Three additional Row Permissions are added (QIBM_DEFAULT*). There is one per each row permission.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.4681396484375, 198.6104736328125, 271.9492492675781, 208.03958129882812], "page": 80, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-44 Row permissions after enabling RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/70"}, {"prov": [{"bbox": [64.39739990234375, 27.93828010559082, 78.4020004272461, 37.68030548095703], "page": 80, "span": [0, 2], "__ref_s3_data": null}], "text": "64", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.39335632324219, 27.674259185791016, 334.4214172363281, 37.33641052246094], "page": 80, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.2726593017578, 698.9834594726562, 544.3787231445312, 721.4147338867188], "page": 81, "span": [0, 121], "__ref_s3_data": null}], "text": "2. Look at one of the row permission definitions by right-clicking it and selecting Definition , as shown in Figure 4-45.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.10995483398438, 541.09619140625, 328.74713134765625, 550.6611938476562], "page": 81, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-45 Selecting row permission definition", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/71"}, {"prov": [{"bbox": [136.38052368164062, 478.2933044433594, 546.0803833007812, 524.6795043945312], "page": 81, "span": [0, 344], "__ref_s3_data": null}], "text": "3. A window opens, as shown in Figure 4-46. Take note of the nonsensical search condition (0=1) of the QIBM_DEFAULT row permission. This permission is ORed with all of the others and it ensures that if someone does not meet any of the criteria from the row permission then this condition is tested, and because it is false the access is denied.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.46764373779297, 184.5873565673828, 343.1969299316406, 193.9542236328125], "page": 81, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-46 Search condition of the QIBM_DEFAULT row permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/72"}, {"prov": [{"bbox": [214.4531707763672, 27.92449188232422, 523.5935668945312, 37.23784255981445], "page": 81, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7138671875, 27.93828010559082, 547.2591552734375, 37.5822639465332], "page": 81, "span": [0, 2], "__ref_s3_data": null}], "text": "65", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.32689666748047, 708.279541015625, 347.13360595703125, 721.6195068359375], "page": 82, "span": [0, 42], "__ref_s3_data": null}], "text": "4.3.10 Demonstrating data access with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.72265625, 672.728515625, 547.2556762695312, 695.5126953125], "page": 82, "span": [0, 148], "__ref_s3_data": null}], "text": "You are now ready to test the RCAC definitions. Run the following SQL statements with each type of user (DBE, SECURITY, TELLER, ADMIN, and WEBUSER):", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.75660705566406, 656.2430419921875, 390.8248596191406, 666.4733276367188], "page": 82, "span": [0, 65], "__ref_s3_data": null}], "text": "GLYPH A SELECT statement that returns the SESSION_USER.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.362548828125, 627.279296875, 543.1919555664062, 649.4446411132812], "page": 82, "span": [0, 131], "__ref_s3_data": null}], "text": "GLYPH A SELECT statement that counts the customers from the CUSTOMER table. There are 90 customers in the CUSTOMER table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.49046325683594, 598.1434326171875, 543.6400756835938, 620.7059326171875], "page": 82, "span": [0, 126], "__ref_s3_data": null}], "text": "GLYPH A simple SELECT statement that returns the following output from the CUSTOMERS table ordered by customer_name:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.32005310058594, 581.2601318359375, 227.99673461914062, 590.979736328125], "page": 82, "span": [0, 22], "__ref_s3_data": null}], "text": "-c u s t o m e r _ i d", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.21688842773438, 569.2603149414062, 237.2466278076172, 578.581298828125], "page": 82, "span": [0, 14], "__ref_s3_data": null}], "text": "-customer_name", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.34410095214844, 557.260498046875, 236.2086181640625, 566.9081420898438], "page": 82, "span": [0, 15], "__ref_s3_data": null}], "text": "-customer_email", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.4298553466797, 545.2606811523438, 246.89581298828125, 554.9027099609375], "page": 82, "span": [0, 30], "__ref_s3_data": null}], "text": "-c u s t o m e r _ t a x _ i d", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.24029541015625, 533.2608642578125, 318.4402160644531, 543.2066040039062], "page": 82, "span": [0, 32], "__ref_s3_data": null}], "text": "-customer_drivers_license_number", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.72146606445312, 505.5359802246094, 357.2575378417969, 517.6439819335938], "page": 82, "span": [0, 36], "__ref_s3_data": null}], "text": "Data access for a DBE user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.34576416015625, 492.2123107910156, 394.5498352050781, 502.6340026855469], "page": 82, "span": [0, 57], "__ref_s3_data": null}], "text": "To test a DBE (MCAIN) user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 462.9244689941406, 531.4830322265625, 485.1828308105469], "page": 82, "span": [0, 152], "__ref_s3_data": null}], "text": "1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-47. In this example, MCAIN is the DBE user.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.22366333007812, 341.9969787597656, 263.2569580078125, 351.207763671875], "page": 82, "span": [0, 28], "__ref_s3_data": null}], "text": "Figure 4-47 DBE session user", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.00750732421875, 314.7911682128906, 503.1596374511719, 325.3352966308594], "page": 82, "span": [0, 78], "__ref_s3_data": null}], "text": "2. The number of rows that the DBE user MCAIN can see is shown in Figure 4-48.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.29940795898438, 155.87709045410156, 452.1958312988281, 165.4332275390625], "page": 82, "span": [0, 71], "__ref_s3_data": null}], "text": "Figure 4-48 Number of rows that DBE user can see in the CUSTOMERS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.38954162597656, 27.93828010559082, 78.4020004272461, 37.51285171508789], "page": 82, "span": [0, 2], "__ref_s3_data": null}], "text": "66", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.38802337646484, 27.724411010742188, 334.4214172363281, 37.31571578979492], "page": 82, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.4130859375, 687.2783203125, 547.291015625, 721.2689819335938], "page": 83, "span": [0, 201], "__ref_s3_data": null}], "text": "3. The result of the third SQL statement is shown in Figure 4-49. Note the masked columns. User MCAIN can see all the rows in the CUSTOMERS table, but there are some columns where the result is masked.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.25863647460938, 312.47698974609375, 376.5732727050781, 321.8323059082031], "page": 83, "span": [0, 73], "__ref_s3_data": null}], "text": "Figure 4-49 SQL statement that is run by the DBE user with masked columns", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.4450225830078, 283.8960876464844, 382.60321044921875, 296.25677490234375], "page": 83, "span": [0, 39], "__ref_s3_data": null}], "text": "Data access for SECURITY user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.2757110595703, 270.2940368652344, 382.8707580566406, 280.9367370605469], "page": 83, "span": [0, 54], "__ref_s3_data": null}], "text": "To test a SECURITY user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 241.54782104492188, 531.4830322265625, 263.9823303222656], "page": 83, "span": [0, 163], "__ref_s3_data": null}], "text": "1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-50. In this example, SECURITY is the security officer.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.2332305908203, 95.13304138183594, 289.78204345703125, 104.47471618652344], "page": 83, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 4-50 SECURITY session user", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/73"}, {"prov": [{"bbox": [214.4011688232422, 27.97011375427246, 523.5935668945312, 37.246681213378906], "page": 83, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7114868164062, 27.93828010559082, 547.2591552734375, 37.56051254272461], "page": 83, "span": [0, 2], "__ref_s3_data": null}], "text": "67", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.40093231201172, 27.93828010559082, 78.4020004272461, 37.56172180175781], "page": 84, "span": [0, 2], "__ref_s3_data": null}], "text": "68", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.33565521240234, 27.722749710083008, 334.4214172363281, 37.3211555480957], "page": 84, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.1411895751953, 698.8203735351562, 547.1937866210938, 721.4033813476562], "page": 84, "span": [0, 152], "__ref_s3_data": null}], "text": "2. The number of rows in the CUSTOMERS table that the security officer can see is shown in Figure 4-51. The security officer cannot see any data at all.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.25540161132812, 554.5038452148438, 487.09649658203125, 563.9228515625], "page": 84, "span": [0, 83], "__ref_s3_data": null}], "text": "Figure 4-51 Number of rows that the security officer can see in the CUSTOMERS table", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8675537109375, 515.6644287109375, 542.7387084960938, 538.0418090820312], "page": 84, "span": [0, 126], "__ref_s3_data": null}], "text": "3. The result of the third SQL statement is shown in Figure 4-52. Note the empty set that is returned to the security officer.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.28270721435547, 341.8423767089844, 362.2696838378906, 351.24163818359375], "page": 84, "span": [0, 71], "__ref_s3_data": null}], "text": "Figure 4-52 SQL statement that is run by the SECURITY user - no results", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/74"}, {"prov": [{"bbox": [136.57626342773438, 313.4159851074219, 368.6448059082031, 325.6100769042969], "page": 84, "span": [0, 37], "__ref_s3_data": null}], "text": "Data access for TELLER user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.22488403320312, 299.5154113769531, 427.7564697265625, 310.1946716308594], "page": 84, "span": [0, 64], "__ref_s3_data": null}], "text": "To test a Teller (TQSPENCER) user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.79998779296875, 271.1532897949219, 530.67822265625, 293.5227355957031], "page": 84, "span": [0, 164], "__ref_s3_data": null}], "text": "1. Confirm that the TELLER user is the user of the session by running the first SQL statement, as shown in Figure 4-53. In this example, TQSPENCER is a TELLER user.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.19703674316406, 110.5588150024414, 278.4291076660156, 119.93122863769531], "page": 84, "span": [0, 31], "__ref_s3_data": null}], "text": "Figure 4-53 TELLER session user", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.14108276367188, 699.0534057617188, 547.2401733398438, 721.4407958984375], "page": 85, "span": [0, 136], "__ref_s3_data": null}], "text": "2. The number of rows in the CUSTOMERS table that the TELLER user can see is shown in Figure 4-54. The TELLER user can see all the rows.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.3040771484375, 561.177001953125, 482.8044738769531, 570.7412719726562], "page": 85, "span": [0, 78], "__ref_s3_data": null}], "text": "Figure 4-54 Number of rows that the TELLER user can see in the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/75"}, {"prov": [{"bbox": [136.166259765625, 510.33868408203125, 547.2914428710938, 544.6939086914062], "page": 85, "span": [0, 195], "__ref_s3_data": null}], "text": "3. The result of the third SQL statement is shown in Figure 4-55. Note the masked columns. The TELLER user, TQSPENSER, can see all the rows, but there are some columns where the result is masked.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.32777404785156, 110.41401672363281, 392.1126403808594, 119.69708251953125], "page": 85, "span": [0, 76], "__ref_s3_data": null}], "text": "Figure 4-55 SQL statement that is run by the TELLER user with masked columns", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [214.44871520996094, 27.97879981994629, 523.5935668945312, 37.242225646972656], "page": 85, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7135620117188, 27.93828010559082, 547.2591552734375, 37.74858474731445], "page": 85, "span": [0, 2], "__ref_s3_data": null}], "text": "69", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.07074737548828, 27.93828010559082, 78.4020004272461, 37.85115432739258], "page": 86, "span": [0, 2], "__ref_s3_data": null}], "text": "70", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.40281677246094, 27.719274520874023, 334.4216003417969, 37.32561111450195], "page": 86, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.5863037109375, 709.5360107421875, 361.28759765625, 721.4993286132812], "page": 86, "span": [0, 36], "__ref_s3_data": null}], "text": "Data access for ADMIN user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.27212524414062, 696.2108154296875, 448.129638671875, 707.0377197265625], "page": 86, "span": [0, 65], "__ref_s3_data": null}], "text": "To test an ADMIN (VGLUCCHESS) user, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 667.0796508789062, 547.238525390625, 689.3929443359375], "page": 86, "span": [0, 164], "__ref_s3_data": null}], "text": "1. Confirm that the ADMIN user is the user of the session by running the first SQL statement, as shown in Figure 4-56. In this example, VGLUCCHESS is an ADMIN user.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.30274963378906, 557.8770141601562, 274.6385803222656, 567.1309814453125], "page": 86, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 4-56 ADMIN session user", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/76"}, {"prov": [{"bbox": [136.12391662597656, 519.0384521484375, 537.4520263671875, 541.3179931640625], "page": 86, "span": [0, 111], "__ref_s3_data": null}], "text": "2. The number of rows that the ADMIN user can see is shown in Figure 4-57. The ADMIN user can see all the rows.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0926971435547, 411.29229736328125, 457.6079406738281, 420.69305419921875], "page": 86, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-57 Number of rows that the ADMIN can see in the CUSTOMERS table", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.28590393066406, 699.2781372070312, 524.1978759765625, 721.4224853515625], "page": 87, "span": [0, 94], "__ref_s3_data": null}], "text": "3. The result of the third SQL statement is shown in Figure 4-58. There are no masked columns.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.31376647949219, 303.7611083984375, 386.8026123046875, 313.2142028808594], "page": 87, "span": [0, 75], "__ref_s3_data": null}], "text": "Figure 4-58 SQL statement that is run by the ADMIN user - no masked columns", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.420166015625, 275.79608154296875, 383.07720947265625, 287.52459716796875], "page": 87, "span": [0, 38], "__ref_s3_data": null}], "text": "Data access for WEBUSER user with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.1792449951172, 250.30612182617188, 527.5846557617188, 272.6474304199219], "page": 87, "span": [0, 121], "__ref_s3_data": null}], "text": "To test a CUSTOMERS (WEBUSER) user that accesses the database by using the web application, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80001831054688, 220.9036102294922, 531.4830322265625, 243.4668731689453], "page": 87, "span": [0, 157], "__ref_s3_data": null}], "text": "1. Confirm that the user is the user of the session by running the first SQL statement, as shown in Figure 4-59. In this example, WEBUSER is a CUSTOMER user.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.7146453857422, 106.48837280273438, 289.7508239746094, 115.76629638671875], "page": 87, "span": [0, 32], "__ref_s3_data": null}], "text": "Figure 4-59 WEBUSER session user", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/77"}, {"prov": [{"bbox": [214.3849639892578, 27.97275733947754, 523.5935668945312, 37.253326416015625], "page": 87, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.3804931640625, 27.93828010559082, 547.2591552734375, 37.64752197265625], "page": 87, "span": [0, 2], "__ref_s3_data": null}], "text": "71", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.16098022460938, 27.93828010559082, 78.4020004272461, 37.712100982666016], "page": 88, "span": [0, 2], "__ref_s3_data": null}], "text": "72", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.71753692626953, 334.4482116699219, 37.33603286743164], "page": 88, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.35842895507812, 686.8118286132812, 547.2925415039062, 721.3489379882812], "page": 88, "span": [0, 195], "__ref_s3_data": null}], "text": "2. A global variable (CUSTOMER_LOGIN_ID) is set by the web application and then is used to check the row permissions. Figure 4-60 shows setting the global variable by using the customer login ID.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.25631713867188, 559.1651000976562, 394.8086242675781, 568.734130859375], "page": 88, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-60 Setting the global variable CUSTOMER_LOGIN_ID", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/78"}, {"prov": [{"bbox": [136.11886596679688, 520.3414916992188, 514.379638671875, 542.4933471679688], "page": 88, "span": [0, 127], "__ref_s3_data": null}], "text": "3. Verify that the global variable was set with the correct value by clicking the Global Variable tab, as shown in Figure 4-61.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.23065185546875, 261.1766357421875, 320.9690246582031, 270.5703430175781], "page": 88, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-61 Viewing the global variable value", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/79"}, {"prov": [{"bbox": [135.91624450683594, 222.26995849609375, 541.2606811523438, 244.6071319580078], "page": 88, "span": [0, 145], "__ref_s3_data": null}], "text": "4. The number of rows that the WEBUSER can see is shown in Figure 4-62. This user can see only the one row that belongs to his web-based user ID.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.35362243652344, 96.42333221435547, 474.386962890625, 106.09137725830078], "page": 88, "span": [0, 74], "__ref_s3_data": null}], "text": "Figure 4-62 Number of rows that the WEBUSER can see in the CUSTOMERS table", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/80"}, {"prov": [{"bbox": [136.2490234375, 699.19580078125, 524.1978759765625, 721.304931640625], "page": 89, "span": [0, 158], "__ref_s3_data": null}], "text": "5. The result of the third SQL statement is shown in Figure 4-63. There are no masked columns, and the user can see only one row, which is the user's own row.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.12274932861328, 529.7809448242188, 368.16168212890625, 539.2711791992188], "page": 89, "span": [0, 68], "__ref_s3_data": null}], "text": "Figure 4-63 SQL statement that is run by WEBUSER - no masked columns", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.15013122558594, 501.2431640625, 377.4743957519531, 513.6253662109375], "page": 89, "span": [0, 39], "__ref_s3_data": null}], "text": "Other examples of data access with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9036865234375, 475.62322998046875, 512.9512939453125, 497.89947509765625], "page": 89, "span": [0, 114], "__ref_s3_data": null}], "text": "To run an SQL statement that lists all the accounts and current balance by customer, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 435.0992736816406, 535.2608642578125, 469.3912353515625], "page": 89, "span": [0, 167], "__ref_s3_data": null}], "text": "1. Run the SQL statement that is shown in Figure 4-64 using the WEBUSER user profile. The SQL statement has no WHERE clause, but the WEBUSER can see only his accounts.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.2671356201172, 238.4688262939453, 520.2247314453125, 247.89683532714844], "page": 89, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 4-64 List of accounts and current balance by customer using the WEBUSER user profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [214.42453002929688, 27.92449188232422, 523.5935668945312, 37.203250885009766], "page": 89, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6150512695312, 27.93828010559082, 547.2591552734375, 37.539588928222656], "page": 89, "span": [0, 2], "__ref_s3_data": null}], "text": "73", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.05266571044922, 27.93828010559082, 78.4020004272461, 37.82094192504883], "page": 90, "span": [0, 2], "__ref_s3_data": null}], "text": "74", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.41647338867188, 27.65258026123047, 334.4214172363281, 37.339962005615234], "page": 90, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.34283447265625, 675.2788696289062, 547.2566528320312, 721.446044921875], "page": 90, "span": [0, 261], "__ref_s3_data": null}], "text": "2. Figure 4-65 shows running a more complex SQL statement that calculates transaction total by account for year and quarter. Run this statement using the WEBUSER profile. The SQL statement has no WHERE clause, but the WEBUSER user can see only his transactions.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.00137329101562, 281.208740234375, 534.98046875, 290.7270812988281], "page": 90, "span": [0, 97], "__ref_s3_data": null}], "text": "Figure 4-65 Calculate transaction total by account for year and quarter using the WEBUSER profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.26443481445312, 687.2783203125, 547.2400512695312, 721.2547607421875], "page": 91, "span": [0, 236], "__ref_s3_data": null}], "text": "3. Run the same SQL statement that lists the accounts and current balance by customer, but use a TELLER user profile. The result of this SQL statement is shown in Figure 4-66. The TELLER user can see all the rows in the CUSTOMERS table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.93475341796875, 289.7537841796875, 501.49462890625, 299.6206970214844], "page": 91, "span": [0, 88], "__ref_s3_data": null}], "text": "Figure 4-66 List of accounts and current balance by customer using a TELLER user profile", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [64.13478088378906, 257.6565856933594, 375.0662841796875, 270.7911071777344], "page": 91, "span": [0, 47], "__ref_s3_data": null}], "text": "4.3.11 Query implementation with RCAC activated", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.62245178222656, 198.10879516601562, 547.1669311523438, 244.65867614746094], "page": 91, "span": [0, 280], "__ref_s3_data": null}], "text": "This section looks at some other interesting information that is related to RCAC by comparing the access plans of the same SQL statement without RCAC and with RCAC. This example uses Visual Explain and runs an SQL statement that lists the accounts and current balance by customer.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [214.41653442382812, 27.961427688598633, 523.5935668945312, 37.21397399902344], "page": 91, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5612182617188, 27.93828010559082, 547.2591552734375, 37.648582458496094], "page": 91, "span": [0, 2], "__ref_s3_data": null}], "text": "75", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.971797943115234, 27.93828010559082, 78.4020004272461, 37.69707107543945], "page": 92, "span": [0, 2], "__ref_s3_data": null}], "text": "76", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.41017150878906, 27.720333099365234, 334.4214172363281, 37.31072998046875], "page": 92, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.24771118164062, 710.7789306640625, 266.8606872558594, 721.4140625], "page": 92, "span": [0, 29], "__ref_s3_data": null}], "text": "Complete the following steps:", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.8000030517578, 670.173583984375, 532.4749755859375, 704.4243774414062], "page": 92, "span": [0, 183], "__ref_s3_data": null}], "text": "1. Figure 4-67 shows the SQL statement in Visual Explain ran with no RCAC. The implementation of the SQL statement is a two-way join, which is exactly what the SQL statement is doing.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.00106811523438, 281.8343505859375, 340.9333801269531, 291.2391052246094], "page": 92, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-67 Visual Explain with no RCAC enabled", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/81"}, {"prov": [{"bbox": [136.2415771484375, 686.9530639648438, 514.048583984375, 721.3856811523438], "page": 93, "span": [0, 228], "__ref_s3_data": null}], "text": "2. Figure 4-68 shows the Visual Explain of the same SQL statement, but with RCAC enabled. It is clear that the implementation of the SQL statement is more complex because the row permission rule becomes part of the WHERE clause.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.0782928466797, 302.49420166015625, 328.2711486816406, 312.02569580078125], "page": 93, "span": [0, 44], "__ref_s3_data": null}], "text": "Figure 4-68 Visual Explain with RCAC enabled", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/82"}, {"prov": [{"bbox": [136.2212677001953, 251.74862670898438, 547.2394409179688, 286.0174865722656], "page": 93, "span": [0, 232], "__ref_s3_data": null}], "text": "3. Compare the advised indexes that are provided by the Optimizer without RCAC and with RCAC enabled. Figure 4-69 shows the index advice for the SQL statement without RCAC enabled. The index being advised is for the ORDER BY clause.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.5755615234375, 115.8077621459961, 227.5344696044922, 125.41432189941406], "page": 93, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-69 Index advice with no RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/83"}, {"prov": [{"bbox": [214.50466918945312, 27.953571319580078, 523.5935668945312, 37.28648376464844], "page": 93, "span": [0, 70], "__ref_s3_data": null}], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.4413452148438, 27.93828010559082, 547.36328125, 37.53264236450195], "page": 93, "span": [0, 2], "__ref_s3_data": null}], "text": "77", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.06967163085938, 687.2786865234375, 547.188720703125, 721.38037109375], "page": 94, "span": [0, 232], "__ref_s3_data": null}], "text": "4. Now, look at the advised indexes with RCAC enabled. As shown in Figure 4-70, there is an additional index being advised, which is basically for the row permission rule. For more information, see 6.4.2, \"Index advisor\" on page 99.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.43022918701172, 544.4075927734375, 250.1027374267578, 553.8165893554688], "page": 94, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 4-70 Index advice with RCAC enabled", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/84"}, {"prov": [{"bbox": [64.0842056274414, 27.93828010559082, 78.4020004272461, 37.74510955810547], "page": 94, "span": [0, 2], "__ref_s3_data": null}], "text": "78", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.6739559173584, 334.4214172363281, 37.38203048706055], "page": 94, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/85"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 95, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 5.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 95, "span": [0, 1], "__ref_s3_data": null}], "text": "5", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 513.0821533203125, 511.1795959472656, 538.5230712890625], "page": 95, "span": [0, 27], "__ref_s3_data": null}], "text": "RCAC and non-SQL interfaces", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.72430419921875, 405.0442199707031, 547.181884765625, 475.4884948730469], "page": 95, "span": [0, 513], "__ref_s3_data": null}], "text": "A benefit of Row and Column Access Control (RCAC) is that its security controls are enforced across all the interfaces that access DB2 for i because the security rules are defined and enforced at the database level. The examples that are shown in this paper focus on SQL-based access, but row permissions and column masks also are enforced for non-SQL interfaces, such as native record-level access in RPG and COBOL programs and CL commands, such as Display Physical File Member ( DSPPFM ) and Copy File ( CPYF ).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.84396362304688, 359.08001708984375, 547.2554931640625, 393.4468994140625], "page": 95, "span": [0, 246], "__ref_s3_data": null}], "text": "This consistent enforcement across all interfaces is a good thing, but there are some nuances and restrictions as a result of applying an SQL-based technology such as RCAC to non-SQL interfaces. These considerations are described in this chapter.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.23419189453125, 337.0604553222656, 412.4370422363281, 347.3124084472656], "page": 95, "span": [0, 65], "__ref_s3_data": null}], "text": "The following topics are covered in this chapter in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.71270751953125, 319.21295166015625, 254.5669708251953, 329.8729553222656], "page": 95, "span": [0, 38], "__ref_s3_data": null}], "text": "GLYPH Unsupported interfaces", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6899871826172, 308.0808410644531, 285.9798278808594, 317.739501953125], "page": 95, "span": [0, 47], "__ref_s3_data": null}], "text": "GLYPH Native query result differences", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.55894470214844, 295.8805847167969, 325.0879211425781, 305.8946838378906], "page": 95, "span": [0, 53], "__ref_s3_data": null}], "text": "GLYPH Accidental updates with masked values", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6660919189453, 283.8162536621094, 318.9596252441406, 293.9013366699219], "page": 95, "span": [0, 49], "__ref_s3_data": null}], "text": "GLYPH System CL commands considerations", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.00144958496094, 27.78173828125, 257.24334716796875, 37.335731506347656], "page": 95, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.47705078125, 27.93828010559082, 547.2591552734375, 37.86309051513672], "page": 95, "span": [0, 2], "__ref_s3_data": null}], "text": "79", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.36381530761719, 702.2925415039062, 275.70184326171875, 718.2304077148438], "page": 96, "span": [0, 26], "__ref_s3_data": null}], "text": "5.1 Unsupported interfaces", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0512237548828, 663.3173828125, 519.7969970703125, 685.9818725585938], "page": 96, "span": [0, 112], "__ref_s3_data": null}], "text": "It is not possible to create a row permission or column mask on a distributed table or a program-described file.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9476776123047, 617.174072265625, 547.2138061523438, 651.9033813476562], "page": 96, "span": [0, 265], "__ref_s3_data": null}], "text": "After a row permission or column mask is added to a table, there are some data access requests that no longer work. An attempt to open or query a table with activated RCAC controls involving any of the following scenarios is rejected with the CPD43A4 error message:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.5137176513672, 600.9810180664062, 526.0348510742188, 611.3893432617188], "page": 96, "span": [0, 103], "__ref_s3_data": null}], "text": "GLYPH A logical file with multiple formats if the open attempt requests more than one format.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56863403320312, 589.0631103515625, 410.4948425292969, 598.9420776367188], "page": 96, "span": [0, 75], "__ref_s3_data": null}], "text": "GLYPH A table or query that specifies an ICU 2.6.1 sort sequence.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.54893493652344, 576.8346557617188, 264.3358459472656, 587.2266845703125], "page": 96, "span": [0, 43], "__ref_s3_data": null}], "text": "GLYPH A table with read triggers.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.76893615722656, 543.160400390625, 537.0208129882812, 565.207763671875], "page": 96, "span": [0, 136], "__ref_s3_data": null}], "text": "This unsupported interface error occurs when a table with RCAC controls is accessed, not when the RCAC control is created and activated.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.05755615234375, 473.14154052734375, 547.275634765625, 531.4187622070312], "page": 96, "span": [0, 451], "__ref_s3_data": null}], "text": "For example, assume that there is a physical file, PF1, which is referenced by a single format logical file (LFS) and a multi-format logical file (LFM). A row permission is successfully created and activated for PF1. Any application that accesses PF1 directly or LFS continues to work without any issues. However, any application that opens LFM with multiple formats receives an error on the open attempt after the row permission is activated for PF1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.07794189453125, 408.80841064453125, 541.2369995117188, 455.4422912597656], "page": 96, "span": [0, 306], "__ref_s3_data": null}], "text": "Important: This potential runtime error places a heavy emphasis on a comprehensive testing plan to ensure that all programs are tested. If testing uncovers an unsupported interface, then you must investigate whether the application can be rewritten to use a data access interface that is supported by RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.42249298095703, 360.3178405761719, 329.61151123046875, 376.4193420410156], "page": 96, "span": [0, 35], "__ref_s3_data": null}], "text": "5.2 Native query result differences", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.82437133789062, 297.52056884765625, 542.3941650390625, 344.1916198730469], "page": 96, "span": [0, 321], "__ref_s3_data": null}], "text": "The SQL Query Engine (SQE) is the only engine that is enhanced by IBM to enforce RCAC controls on query requests. In order for native query requests to work with RCAC, these native query requests are now processed by SQE instead of the Classic Query Engine (CQE). Native query requests can consist of the following items:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.66761779785156, 281.1392517089844, 198.2183074951172, 291.8307189941406], "page": 96, "span": [0, 25], "__ref_s3_data": null}], "text": "GLYPH Query/400", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.72911071777344, 269.1394348144531, 214.61248779296875, 280.1279602050781], "page": 96, "span": [0, 26], "__ref_s3_data": null}], "text": "GLYPH QQQQRY API", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.67552185058594, 257.1396179199219, 315.83990478515625, 267.3670959472656], "page": 96, "span": [0, 51], "__ref_s3_data": null}], "text": "GLYPH Open Query File ( OPNQRYF ) command", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4887237548828, 244.85897827148438, 285.8927307128906, 255.41610717773438], "page": 96, "span": [0, 44], "__ref_s3_data": null}], "text": "GLYPH Run Query ( RUNQRY ) command", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4298553466797, 232.98248291015625, 441.65673828125, 243.50750732421875], "page": 96, "span": [0, 74], "__ref_s3_data": null}], "text": "GLYPH Native open (RPG, COBOL, OPNDBF, and so on) of an SQL view", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.82940673828125, 175.08465576171875, 541.6351928710938, 221.27053833007812], "page": 96, "span": [0, 332], "__ref_s3_data": null}], "text": "Legacy queries that have been running without any issues for many years and over many IBM i releases are now processed by a different query engine. As a result, the runtime behavior and results that are returned can be different for native query requests with RCAC enabled. The OPNQRYF command and Query/400 run with SQE by default.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6905975341797, 140.7740020751953, 547.283447265625, 163.3935089111328], "page": 96, "span": [0, 129], "__ref_s3_data": null}], "text": "The following list documents some of the query output differences that can occur when native query requests are processed by CQE:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.69326782226562, 123.73572540283203, 299.5278015136719, 134.05801391601562], "page": 96, "span": [0, 52], "__ref_s3_data": null}], "text": "GLYPH Different ordering in the result set", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6090850830078, 111.89027404785156, 393.4990234375, 122.04586791992188], "page": 96, "span": [0, 72], "__ref_s3_data": null}], "text": "GLYPH Different values for null columns or columns with errors", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.68191528320312, 99.16911315917969, 358.4886169433594, 109.94051361083984], "page": 96, "span": [0, 58], "__ref_s3_data": null}], "text": "GLYPH Suppression of some mapping error messages", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.53482055664062, 87.86771392822266, 310.5740661621094, 98.40489196777344], "page": 96, "span": [0, 52], "__ref_s3_data": null}], "text": "GLYPH Loss of RRN positioning capabilities", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56546020507812, 75.50570678710938, 354.23272705078125, 86.25474548339844], "page": 96, "span": [0, 61], "__ref_s3_data": null}], "text": "GLYPH Duplicate key processing behavior differences", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5389404296875, 63.51583480834961, 246.47032165527344, 73.92317962646484], "page": 96, "span": [0, 36], "__ref_s3_data": null}], "text": "GLYPH Missing key feedback", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.39680480957031, 27.93828010559082, 78.4020004272461, 37.634681701660156], "page": 96, "span": [0, 2], "__ref_s3_data": null}], "text": "80", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.37086486816406, 27.725845336914062, 334.4214172363281, 37.295021057128906], "page": 96, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.34434509277344, 699.2781372070312, 543.0580444335938, 721.3868408203125], "page": 97, "span": [0, 105], "__ref_s3_data": null}], "text": "For a list of the differences and additional details, see the IBM i Memo to Users Version 7.2 , found at:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.02487182617188, 681.9579467773438, 521.9223022460938, 692.5218505859375], "page": 97, "span": [0, 77], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgmtu.htm", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.06448364257812, 648.2789306640625, 544.6605834960938, 670.6199951171875], "page": 97, "span": [0, 180], "__ref_s3_data": null}], "text": "In addition, the performance of a native query with SQE can be different. It is possible that a new index or keyed logical file might need to be created to improve the performance.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.0964813232422, 595.4119873046875, 495.4548034667969, 630.0718994140625], "page": 97, "span": [0, 229], "__ref_s3_data": null}], "text": "Important: Based on the potential impacts of query result set and performance differences, you should perform extensive functional testing and performance benchmarking of applications and reports that use native query interfaces.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.58708953857422, 547.8720092773438, 396.822265625, 563.6151123046875], "page": 97, "span": [0, 41], "__ref_s3_data": null}], "text": "5.3 Accidental updates with masked values", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9681854248047, 497.2587890625, 547.184814453125, 531.249755859375], "page": 97, "span": [0, 193], "__ref_s3_data": null}], "text": "The masked values that are returned by a column mask can potentially cause the original data value to be accidentally overwritten, especially with applications using native record-level access.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0658416748047, 439.1994934082031, 541.6968994140625, 485.2427062988281], "page": 97, "span": [0, 291], "__ref_s3_data": null}], "text": "For example, consider a table containing three columns of first name, last name, and tax ID that is read by an RPG program. The user running the program is not authorized to see the tax ID value, so a masked value (*****3333) is written into the program's record buffer, as shown Figure 5-1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.98301696777344, 369.28070068359375, 547.1439819335938, 427.1898498535156], "page": 97, "span": [0, 453], "__ref_s3_data": null}], "text": "In this example, the application reads the data for an update to correct the misspelling of the last name. The last name value is changed to Smith in the buffer. Now, a WRITE request is issued by the program, which uses the contents of the record buffer to update the row in the underlying DB2 table. Unfortunately, the record buffer still contains a masked value for the tax ID, so the tax ID value in the table is accidentally set to the masked value.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.35667419433594, 60.42193603515625, 374.1333312988281, 69.67607116699219], "page": 97, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 5-1 Accidental update with masked values scenario", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/86"}, {"prov": [{"bbox": [353.7305603027344, 27.851152420043945, 523.6332397460938, 37.11669158935547], "page": 97, "span": [0, 38], "__ref_s3_data": null}], "text": "Chapter 5. RCAC and non-SQL interfaces", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.676513671875, 27.93828010559082, 547.2591552734375, 37.49736785888672], "page": 97, "span": [0, 2], "__ref_s3_data": null}], "text": "81", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.46670532226562, 27.93828010559082, 78.4020004272461, 37.6083984375], "page": 98, "span": [0, 2], "__ref_s3_data": null}], "text": "82", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.70258331298828, 334.4214172363281, 37.288448333740234], "page": 98, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.01290893554688, 699.2785034179688, 545.1429443359375, 721.2443237304688], "page": 98, "span": [0, 107], "__ref_s3_data": null}], "text": "Obviously, careful planning and testing should be exercised to avoid accidental updates with masked values.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.98336791992188, 641.1885986328125, 547.2675170898438, 687.29736328125], "page": 98, "span": [0, 333], "__ref_s3_data": null}], "text": "DB2 for i also enhanced its check constraint support in the IBM i 7.2 release with a new ON UPDATE clause that allows the existing value to be preserved when a masked value is detected by a check constraint. Details about how to employ this new check constraint support can be found in 6.8.1, \"Check constraint solution\" on page 108.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.53089904785156, 597.7332763671875, 385.5848388671875, 613.8056030273438], "page": 98, "span": [0, 37], "__ref_s3_data": null}], "text": "5.4 System CL commands considerations", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.91700744628906, 522.7411499023438, 547.4896240234375, 581.418212890625], "page": 98, "span": [0, 440], "__ref_s3_data": null}], "text": "As stated earlier, RCAC controls are enforced on all data access interfaces. This enforcement is not limited to programmatic interfaces; it also includes system CL commands that read and insert data, such as the Create Duplicate Object ( CRTDUPOBJ ) and Start DFU ( STRDFU ) CL commands. This section documents the behavior of the Create Duplicate Object ( CRTDUPOBJ ), Copy File ( CPYF ), and Copy Library ( CPYLIB ) CL commands with RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.38429260253906, 490.38629150390625, 405.0467224121094, 503.6981506347656], "page": 98, "span": [0, 49], "__ref_s3_data": null}], "text": "5.4.1 Create Duplicate Object (CRTDUPOBJ) command", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.77301025390625, 431.25885009765625, 542.9708251953125, 477.3899230957031], "page": 98, "span": [0, 294], "__ref_s3_data": null}], "text": "The CRTDUPOBJ command is enhanced with a new Access Control ( ACCCTL ) parameter in the IBM i 7.2 release to copy RCAC controls to the new object being created. Row permissions and column masks are copied to the new object by default because the default value for the ACCCTL parameter is *ALL .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.04022216796875, 385.2994079589844, 538.5584716796875, 419.4244689941406], "page": 98, "span": [0, 210], "__ref_s3_data": null}], "text": "If the invoker of the CRTDUPOBJ command asks for data to be copied with a value of *YES for the DATA parameter, the value of the ACCCTL parameter must be *ALL . If not, the command invocation receives an error.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.79270935058594, 315.28057861328125, 547.2168579101562, 373.29388427734375], "page": 98, "span": [0, 442], "__ref_s3_data": null}], "text": "When data is copied to the duplicated object with the DATA parameter, all rows and unmasked column values are copied into the new object, even if the command invoker is not authorized to view all rows or certain column values. This behavior occurs because the RCAC controls also are copied to the new object. The copied RCAC controls enforce that only authorized users are allowed to view row and column values in the newly duplicated object.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.39613342285156, 282.42333984375, 270.95599365234375, 295.696533203125], "page": 98, "span": [0, 30], "__ref_s3_data": null}], "text": "5.4.2 Copy File (CPYF) command", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.81484985351562, 234.62559509277344, 547.2855224609375, 269.47161865234375], "page": 98, "span": [0, 214], "__ref_s3_data": null}], "text": "The CPYF command copies only data, so there is no new parameter to copy RCAC controls to the target table. Therefore, if CPYF is used to create a target table, there are no RCAC controls placed on the target table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9669189453125, 129.28024291992188, 547.3273315429688, 223.54742431640625], "page": 98, "span": [0, 660], "__ref_s3_data": null}], "text": "When RCAC controls are in place on the source table, the CPYF command is limited to reading rows and column values that are based on the invoker of the CPYF command. If a user is authorized to see all rows and column values, then all rows and unmasked column values are copied to the target table (assuming no RCAC controls are on the target table). If a user without full access runs the CPYF command, the CPYF command can copy only a subset of the rows into the target table. In addition, if that user can view only masked column values, then masked values are copied into the target table. This also applies to the Copy to Import File ( CPYTOIMPF ) command.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0917205810547, 71.02362060546875, 535.9159545898438, 117.43394470214844], "page": 98, "span": [0, 330], "__ref_s3_data": null}], "text": "If the target table has RCAC controls defined and activated, then the CPYF command is allowed only to add or replace rows in the target table based on the RCAC controls. If CPYF tries to add a row to the target table that the command invoker is not allowed to view according to the target RCAC controls, then an error is received.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.36076354980469, 708.317626953125, 305.67718505859375, 721.67529296875], "page": 99, "span": [0, 35], "__ref_s3_data": null}], "text": "5.4.3 Copy Library (CPYLIB) command", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.80535888671875, 637.099365234375, 544.9737548828125, 695.4727783203125], "page": 99, "span": [0, 354], "__ref_s3_data": null}], "text": "The CPYLIB command is enhanced with the same Access Control ( ACCCTL ) parameter as the CRTDUPOBJ command in the IBM i 7.2 release (see 5.4.1, \"Create Duplicate Object (CRTDUPOBJ) command\" on page 82). Row permissions and column masks are copied to the new object in the new library by default because the default value for the ACCCTL parameter is *ALL .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [353.8760070800781, 27.865577697753906, 523.6332397460938, 37.186405181884766], "page": 99, "span": [0, 38], "__ref_s3_data": null}], "text": "Chapter 5. RCAC and non-SQL interfaces", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7760009765625, 27.93828010559082, 547.2591552734375, 37.517459869384766], "page": 99, "span": [0, 2], "__ref_s3_data": null}], "text": "83", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.56328582763672, 27.93828010559082, 78.4020004272461, 37.691104888916016], "page": 100, "span": [0, 2], "__ref_s3_data": null}], "text": "84", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.69057273864746, 334.5158386230469, 37.364131927490234], "page": 100, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/87"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 101, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 6.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1068572998047, 513.0821533203125, 455.59796142578125, 538.5267944335938], "page": 101, "span": [0, 25], "__ref_s3_data": null}], "text": "Additional considerations", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.85086059570312, 452.9068603515625, 531.345458984375, 475.37335205078125], "page": 101, "span": [0, 168], "__ref_s3_data": null}], "text": "This chapter covers additional considerations that must be taken into account when implementing Row and Column Access Control (RCAC), including the following functions:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.82879638671875, 435.5367736816406, 267.31884765625, 445.6518859863281], "page": 101, "span": [0, 40], "__ref_s3_data": null}], "text": "GLYPH Timing of column masking", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.65838623046875, 424.05908203125, 221.50328063964844, 433.73193359375], "page": 101, "span": [0, 29], "__ref_s3_data": null}], "text": "GLYPH Data movement", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.8079376220703, 412.05926513671875, 174.7480926513672, 421.72412109375], "page": 101, "span": [0, 21], "__ref_s3_data": null}], "text": "GLYPH Joins", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.80653381347656, 400.0594482421875, 177.69752502441406, 409.92095947265625], "page": 101, "span": [0, 21], "__ref_s3_data": null}], "text": "GLYPH Views", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.71363830566406, 387.4857177734375, 262.4305114746094, 398.0342102050781], "page": 101, "span": [0, 41], "__ref_s3_data": null}], "text": "GLYPH Materialized query tables", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.69224548339844, 376.059814453125, 210.2908477783203, 386.0014953613281], "page": 101, "span": [0, 29], "__ref_s3_data": null}], "text": "GLYPH Index advisor", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.62872314453125, 363.18524169921875, 310.97344970703125, 373.8130798339844], "page": 101, "span": [0, 51], "__ref_s3_data": null}], "text": "GLYPH Monitoring, analysis, and debugging", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.52769470214844, 351.5644226074219, 273.3426513671875, 362.0223693847656], "page": 101, "span": [0, 43], "__ref_s3_data": null}], "text": "GLYPH Performance and scalability", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.10952758789062, 329.46014404296875, 347.4121398925781, 340.1173400878906], "page": 101, "span": [0, 49], "__ref_s3_data": null}], "text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.86045837402344, 312.50946044921875, 267.31884765625, 323.55072021484375], "page": 101, "span": [0, 40], "__ref_s3_data": null}], "text": "GLYPH Timing of column masking", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.68113708496094, 301.06097412109375, 296.47052001953125, 311.4767150878906], "page": 101, "span": [0, 45], "__ref_s3_data": null}], "text": "GLYPH RCAC effects on data movement", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7341766357422, 289.0611572265625, 248.6178741455078, 299.3589172363281], "page": 101, "span": [0, 37], "__ref_s3_data": null}], "text": "GLYPH RCAC effects on joins", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7424774169922, 276.7906188964844, 368.6199951171875, 287.5904235839844], "page": 101, "span": [0, 62], "__ref_s3_data": null}], "text": "GLYPH Monitoring, analyzing, and debugging with RCAC", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.42808532714844, 264.9301452636719, 428.5085754394531, 275.88232421875], "page": 101, "span": [0, 77], "__ref_s3_data": null}], "text": "GLYPH Views, materialized query tables, and query rewrite with RCAC", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.52320861816406, 252.71844482421875, 349.4490051269531, 263.50238037109375], "page": 101, "span": [0, 59], "__ref_s3_data": null}], "text": "GLYPH RCAC effects on performance and scalability", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.65530395507812, 240.60609436035156, 390.4403381347656, 251.41629028320312], "page": 101, "span": [0, 70], "__ref_s3_data": null}], "text": "GLYPH Exclusive lock to implement RCAC (availability issues)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.69041442871094, 228.79293823242188, 315.4986267089844, 239.35397338867188], "page": 101, "span": [0, 51], "__ref_s3_data": null}], "text": "GLYPH Avoiding propagation of masked data", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.63272094726562, 217.0623016357422, 307.3042297363281, 227.76193237304688], "page": 101, "span": [0, 48], "__ref_s3_data": null}], "text": "GLYPH Triggers and functions (SECURED)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.63467407226562, 204.85337829589844, 315.1477355957031, 215.71888732910156], "page": 101, "span": [0, 53], "__ref_s3_data": null}], "text": "GLYPH RCAC is only one part of the solution", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.9858283996582, 27.747447967529297, 257.24334716796875, 37.346683502197266], "page": 101, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7403564453125, 27.93828010559082, 547.2591552734375, 37.66413879394531], "page": 101, "span": [0, 2], "__ref_s3_data": null}], "text": "85", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 101, "span": [0, 1], "__ref_s3_data": null}], "text": "6", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.65674591064453, 702.2841796875, 298.45440673828125, 718.27685546875], "page": 102, "span": [0, 28], "__ref_s3_data": null}], "text": "6.1 Timing of column masking", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.86968994140625, 627.637451171875, 547.2496337890625, 685.9827270507812], "page": 102, "span": [0, 381], "__ref_s3_data": null}], "text": "An important design and implementation consideration is the fact that RCAC column masking occurs after all of the query processing is complete, which means that the query results are not at all based on the masked values. Any local selection, joining, grouping, or ordering operations are based on the unmasked column values. Only the final result set is the target of the masking.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9619598388672, 593.6736450195312, 547.2325439453125, 615.8494873046875], "page": 102, "span": [0, 140], "__ref_s3_data": null}], "text": "An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [222.84666442871094, 559.6641845703125, 250.23239135742188, 568.2152099609375], "page": 102, "span": [0, 6], "__ref_s3_data": null}], "text": "SELECT", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [222.54440307617188, 515.1958618164062, 263.9931640625, 545.9962768554688], "page": 102, "span": [0, 22], "__ref_s3_data": null}], "text": "FROM GROUP BY ORDER BY", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [160.45700073242188, 481.6206970214844, 285.03265380859375, 494.0350036621094], "page": 102, "span": [0, 20], "__ref_s3_data": null}], "text": "Without RCAC Masking", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [341.0262451171875, 481.6206970214844, 447.7765808105469, 493.9162292480469], "page": 102, "span": [0, 17], "__ref_s3_data": null}], "text": "With RCAC Masking", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/14"}, {"prov": [{"bbox": [136.15956115722656, 310.1897888183594, 289.9644775390625, 319.9989013671875], "page": 102, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-1 Timing of column masking", "type": "caption", "name": "Caption", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/15"}, {"prov": [{"bbox": [289.7908630371094, 537.4301147460938, 389.4005432128906, 568.0421142578125], "page": 102, "span": [0, 53], "__ref_s3_data": null}], "text": "CREDIT_CARD_NUMBER, SUM(AMOUNT) AS TOTAL TRANSACTIONS", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [290.2843017578125, 526.31298828125, 383.71990966796875, 534.2352905273438], "page": 102, "span": [0, 18], "__ref_s3_data": null}], "text": "CREDIT_CARD_NUMBER", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [289.9924621582031, 514.7672729492188, 386.2455749511719, 524.0040283203125], "page": 102, "span": [0, 19], "__ref_s3_data": null}], "text": "CREDIT_CARD_NUMBER;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.49551391601562, 27.93828010559082, 78.4020004272461, 37.467079162597656], "page": 102, "span": [0, 2], "__ref_s3_data": null}], "text": "86", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.642459869384766, 334.4214172363281, 37.34343719482422], "page": 102, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.79122924804688, 626.829345703125, 547.1474609375, 721.4331665039062], "page": 103, "span": [0, 652], "__ref_s3_data": null}], "text": "Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.33775329589844, 563.2588500976562, 540.7468872070312, 609.4223022460938], "page": 103, "span": [0, 262], "__ref_s3_data": null}], "text": "Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1103515625, 250.29544067382812, 386.60394287109375, 260.0503845214844], "page": 103, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 6-2 Masking differences between Fieldproc and RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/88"}, {"prov": [{"bbox": [376.2842712402344, 27.846771240234375, 523.6287841796875, 37.037837982177734], "page": 103, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7338256835938, 27.93828010559082, 547.2591552734375, 37.56776428222656], "page": 103, "span": [0, 2], "__ref_s3_data": null}], "text": "87", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.67425537109375, 706.0162963867188, 342.9832458496094, 721.5018310546875], "page": 104, "span": [0, 33], "__ref_s3_data": null}], "text": "6.2 RCAC effects on data movement", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.64599609375, 631.2990112304688, 547.2276000976562, 689.4252319335938], "page": 104, "span": [0, 370], "__ref_s3_data": null}], "text": "As described earlier and shown in Figure 6-3, RCAC is applied pervasively regardless of the data access programming interface, SQL statement, or IBM i command. The effects of RCAC on data movement scenarios can be profound and possibly problematic. It is important to understand these effects and make the appropriate adjustments to avoid incorrect results or data loss.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.05140686035156, 491.0803527832031, 292.9797668457031, 500.5362854003906], "page": 104, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 6-3 RCAC and data movement", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/89"}, {"prov": [{"bbox": [135.8123321533203, 392.3400573730469, 547.2745361328125, 474.5461730957031], "page": 104, "span": [0, 619], "__ref_s3_data": null}], "text": "The \"user\" that is running the data movement application or process, whether it be a high availability (HA) scenario, an extract, transform, load (ETL) scenario, or just copying data from one file or table to another one, must have permission to all the source rows without masking, and not be restricted from putting rows into the target. Allowing the data movement application or process to bypass the RCAC rules must be based on a clear and concise understanding of the organization's object security and data access policy. Proper design, implementation, and testing are critical success factors when applying RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [141.9169158935547, 316.03607177734375, 536.527587890625, 374.77313232421875], "page": 104, "span": [0, 360], "__ref_s3_data": null}], "text": "Important: RCAC is applied to the table or physical file access. It is not applied to the journal receiver access. Any and all database transactions are represented in the journal regardless of RCAC row permissions and column masks. This makes it essential that IBM i security is used to ensure that only authorized personnel have access to the journaled data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.920166015625, 287.2419738769531, 390.6604919433594, 297.3553771972656], "page": 104, "span": [0, 59], "__ref_s3_data": null}], "text": "This section covers in detail the following three examples:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.67665100097656, 270.45928955078125, 372.0890197753906, 280.7431335449219], "page": 104, "span": [0, 64], "__ref_s3_data": null}], "text": "GLYPH Effects when RCAC is defined on the source table", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6602020263672, 258.45947265625, 367.72723388671875, 268.80596923828125], "page": 104, "span": [0, 64], "__ref_s3_data": null}], "text": "GLYPH Effects when RCAC is defined on the target table", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.55271911621094, 246.36570739746094, 430.467529296875, 256.64398193359375], "page": 104, "span": [0, 77], "__ref_s3_data": null}], "text": "GLYPH Effects when RCAC is defined on both source and target tables", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.39418029785156, 213.8573760986328, 407.9704895019531, 226.74691772460938], "page": 104, "span": [0, 54], "__ref_s3_data": null}], "text": "6.2.1 Effects when RCAC is defined on the source table", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.16326904296875, 178.47857666015625, 536.1681518554688, 200.64059448242188], "page": 104, "span": [0, 102], "__ref_s3_data": null}], "text": "Example 6-1 shows a simple example that illustrates the effect of RCAC as defined on the source table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.6311798095703, 157.3170928955078, 331.9786682128906, 166.7194366455078], "page": 104, "span": [0, 40], "__ref_s3_data": null}], "text": "Example 6-1 INSERT INTO TARGET statement", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.79693603515625, 139.6705780029297, 346.6770935058594, 149.1321258544922], "page": 104, "span": [0, 42], "__ref_s3_data": null}], "text": "INSERT INTO TARGET (SELECT * FROM SOURCE);", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.45632934570312, 27.93828010559082, 78.4020004272461, 37.49223327636719], "page": 104, "span": [0, 2], "__ref_s3_data": null}], "text": "88", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.3826904296875, 27.736873626708984, 334.4214172363281, 37.29977798461914], "page": 104, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.0976104736328, 650.8802490234375, 547.2900390625, 721.2063598632812], "page": 105, "span": [0, 516], "__ref_s3_data": null}], "text": "For example, given a \"source\" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that has the RCAC rules applied. This reduced and modified result set is inserted into the \"target\" table even though the query is defined as returning all rows and all columns. Instead of seven rows that are selected from the source, only three rows are returned and placed into the target, as shown in Figure 6-4.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.33627319335938, 375.21319580078125, 377.8245544433594, 384.5627136230469], "page": 105, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 6-4 RCAC effects on data movement from SOURCE", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/90"}, {"prov": [{"bbox": [64.3865737915039, 343.17596435546875, 401.6576843261719, 356.1142578125], "page": 105, "span": [0, 54], "__ref_s3_data": null}], "text": "6.2.2 Effects when RCAC is defined on the target table", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.10086059570312, 307.6630859375, 536.1681518554688, 329.97418212890625], "page": 105, "span": [0, 102], "__ref_s3_data": null}], "text": "Example 6-2 shows a simple example that illustrates the effect of RCAC as defined on the target table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 286.7970886230469, 331.8053894042969, 295.9055480957031], "page": 105, "span": [0, 40], "__ref_s3_data": null}], "text": "Example 6-2 INSERT INTO TARGET statement", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 268.98992919921875, 346.6770935058594, 278.5483703613281], "page": 105, "span": [0, 42], "__ref_s3_data": null}], "text": "INSERT INTO TARGET (SELECT * FROM SOURCE);", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [376.2767333984375, 27.871395111083984, 523.6287841796875, 37.01155090332031], "page": 105, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.7984619140625, 27.93828010559082, 547.2591552734375, 37.58755111694336], "page": 105, "span": [0, 2], "__ref_s3_data": null}], "text": "89", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.30988311767578, 27.93828010559082, 78.4020004272461, 37.60688781738281], "page": 106, "span": [0, 2], "__ref_s3_data": null}], "text": "90", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.724334716796875, 334.4214172363281, 37.2808952331543], "page": 106, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.002685546875, 651.1795654296875, 547.2645874023438, 721.380859375], "page": 106, "span": [0, 546], "__ref_s3_data": null}], "text": "Given a \"target\" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that represents all the rows and columns. The seven row result set is inserted into the \"target\", and the RCAC row permission causes an error to be returned, as shown in Figure 6-5. The source rows where NAME = 'CAIN' do not satisfy the target table's permission, and therefore cannot be inserted. In other words, you are inserting data that you cannot read.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1909637451172, 368.4858703613281, 367.20880126953125, 377.8680419921875], "page": 106, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 6-5 RCAC effects on data movement on TARGET", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/91"}, {"prov": [{"bbox": [64.30170440673828, 336.48931884765625, 490.12786865234375, 349.3372497558594], "page": 106, "span": [0, 67], "__ref_s3_data": null}], "text": "6.2.3 Effects when RCAC is defined on both source and target tables", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.27003479003906, 301.178466796875, 541.6332397460938, 323.1507568359375], "page": 106, "span": [0, 123], "__ref_s3_data": null}], "text": "Example 6-3 shows a simple example that illustrates the effect of RCAC as defined on both the source and the target tables.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.31845092773438, 280.07708740234375, 332.0157775878906, 289.33172607421875], "page": 106, "span": [0, 40], "__ref_s3_data": null}], "text": "Example 6-3 INSERT INTO TARGET statement", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.64996337890625, 262.2247314453125, 346.6770935058594, 271.6866760253906], "page": 106, "span": [0, 42], "__ref_s3_data": null}], "text": "INSERT INTO TARGET (SELECT * FROM SOURCE);", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.06167602539062, 173.19976806640625, 547.2467041015625, 243.45176696777344], "page": 106, "span": [0, 473], "__ref_s3_data": null}], "text": "Given a \"source\" table and a \"target\" table with a row permission defined as NAME <> 'CAIN' and a column mask that is defined to project the value 999.99 for AMOUNT, the SELECT statement produces a result set that has the RCAC rules applied. This reduced and modified result set is inserted into the \"target\" table even though the query is defined as returning all rows and all columns. Instead of seven rows that are selected from the source, only three rows are returned.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.64163208007812, 674.9846801757812, 547.2501831054688, 721.2555541992188], "page": 107, "span": [0, 331], "__ref_s3_data": null}], "text": "Although the source rows where NAME <> 'CAIN' do satisfy the target table's permission, the AMOUNT column value of 999.99 represents masked data and therefore cannot be inserted. An error is returned indicating the failure, as shown in Figure 6-6. In this scenario, DB2 is protecting against an overt attempt to insert masked data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.189453125, 395.10968017578125, 425.718017578125, 404.72088623046875], "page": 107, "span": [0, 61], "__ref_s3_data": null}], "text": "Figure 6-6 RCAC effects on data movement on SOURCE and TARGET", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/92"}, {"prov": [{"bbox": [64.65412139892578, 351.7353515625, 263.02801513671875, 367.916748046875], "page": 107, "span": [0, 25], "__ref_s3_data": null}], "text": "6.3 RCAC effects on joins", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.89974975585938, 301.1240539550781, 546.7406005859375, 335.3844909667969], "page": 107, "span": [0, 258], "__ref_s3_data": null}], "text": "As mentioned previously, a fundamental concept of row permission is that it defines a logical subset of rows that a user or group of users is permitted to access and use. This subset becomes the new basis of any query against the table that has RCAC enabled.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.38418579101562, 260.81829833984375, 541.3812255859375, 283.3266296386719], "page": 107, "span": [0, 167], "__ref_s3_data": null}], "text": "Note: Thinking of the row permission as defining a virtual set of rows that can be operated on is the secret to understanding the effect of RCAC on any join operation.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [376.33331298828125, 27.853492736816406, 523.6287841796875, 37.027339935302734], "page": 107, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.72705078125, 27.93828010559082, 547.2591552734375, 37.5089225769043], "page": 107, "span": [0, 2], "__ref_s3_data": null}], "text": "91", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.33194732666016, 27.93828010559082, 78.4020004272461, 37.64910888671875], "page": 108, "span": [0, 2], "__ref_s3_data": null}], "text": "92", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.734079360961914, 334.4214172363281, 37.32621383666992], "page": 108, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.87557983398438, 699.1170654296875, 537.429931640625, 721.29052734375], "page": 108, "span": [0, 149], "__ref_s3_data": null}], "text": "As shown in Figure 6-7, there are two different sets, set A and set B. However, set B has a row permission that subsets the rows that a user can see.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1213836669922, 463.05242919921875, 334.17889404296875, 472.4340515136719], "page": 108, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 6-7 Set A and set B with row permissions", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/93"}, {"prov": [{"bbox": [64.3143310546875, 430.5066833496094, 166.59303283691406, 443.1670837402344], "page": 108, "span": [0, 17], "__ref_s3_data": null}], "text": "6.3.1 Inner joins", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0799102783203, 395.0136413574219, 547.21875, 417.2611999511719], "page": 108, "span": [0, 158], "__ref_s3_data": null}], "text": "Inner join defines the intersection of two data sets. For a row to be returned from the inner join query, it must appear in both sets, as shown in Figure 6-8.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0496826171875, 157.818115234375, 327.55682373046875, 167.18350219726562], "page": 108, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 6-8 Inner join without RCAC permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/94"}, {"prov": [{"bbox": [136.28961181640625, 675.0590209960938, 547.3219604492188, 721.3756103515625], "page": 109, "span": [0, 279], "__ref_s3_data": null}], "text": "Given that row permission serves to eliminate logically rows from one or more sets, the result set from an inner join (and a subquery) can be different when RCAC is applied. RCAC can reduce the number of rows that are permitted to be accessed by the join, as shown in Figure 6-9.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [141.91893005371094, 622.695068359375, 537.6323852539062, 657.2465209960938], "page": 109, "span": [0, 207], "__ref_s3_data": null}], "text": "Effect of column masks on inner joins: Because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.10440063476562, 359.0824890136719, 314.9508972167969, 368.5648498535156], "page": 109, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 6-9 Inner join with RCAC permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/95"}, {"prov": [{"bbox": [376.2262878417969, 27.840274810791016, 523.6287841796875, 37.05694580078125], "page": 109, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6971435546875, 27.93828010559082, 547.2591552734375, 37.44646072387695], "page": 109, "span": [0, 2], "__ref_s3_data": null}], "text": "93", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.3655014038086, 708.4302978515625, 169.4800567626953, 721.3550415039062], "page": 110, "span": [0, 17], "__ref_s3_data": null}], "text": "6.3.2 Outer joins", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.71127319335938, 649.0245971679688, 547.2286376953125, 695.3336791992188], "page": 110, "span": [0, 360], "__ref_s3_data": null}], "text": "Outer joins preserve one or both sides of two data sets. A row can be returned from the outer join query if it appears in the primary set (LEFT, RIGHT, or both in the case of FULL), as shown in Figure 6-10. Column values from the secondary set are returned if the row has a match in the primary set. Otherwise, NULL is returned for the column value by default.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.24244689941406, 407.2479553222656, 334.27374267578125, 416.5892333984375], "page": 110, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 6-10 Outer join without RCAC permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/96"}, {"prov": [{"bbox": [64.35562133789062, 27.93828010559082, 78.4020004272461, 37.65726852416992], "page": 110, "span": [0, 2], "__ref_s3_data": null}], "text": "94", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.70145034790039, 334.4722595214844, 37.30113983154297], "page": 110, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.098388671875, 686.85498046875, 535.2982177734375, 721.3606567382812], "page": 111, "span": [0, 218], "__ref_s3_data": null}], "text": "Given that row permission serves to eliminate logically rows from one or more sets, more column values that are returned from the secondary table in outer join can be NULL when RCAC is applied, as shown in Figure 6-11.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [141.93408203125, 634.7100219726562, 537.6323852539062, 669.4381713867188], "page": 111, "span": [0, 207], "__ref_s3_data": null}], "text": "Effect of column masks on inner joins: Because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.07041931152344, 357.96063232421875, 321.8915710449219, 367.32086181640625], "page": 111, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 6-11 Outer join with RCAC permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/97"}, {"prov": [{"bbox": [376.27435302734375, 27.857723236083984, 523.6287841796875, 37.033607482910156], "page": 111, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.671142578125, 27.93828010559082, 547.2591552734375, 37.526145935058594], "page": 111, "span": [0, 2], "__ref_s3_data": null}], "text": "95", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.3558349609375, 708.388671875, 196.83258056640625, 721.2674560546875], "page": 112, "span": [0, 21], "__ref_s3_data": null}], "text": "6.3.3 Exception joins", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.830810546875, 648.7778930664062, 547.2914428710938, 695.349609375], "page": 112, "span": [0, 297], "__ref_s3_data": null}], "text": "Exception joins preserve one side of two data sets. A row can be returned from the exception join query if it appears in the primary set (LEFT or RIGHT) and the row does not appear in the secondary set, as shown in Figure 6-12. Column values from the secondary set are returned as NULL by default.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1206512451172, 385.1718444824219, 351.78106689453125, 394.6317138671875], "page": 112, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 6-12 Exception join without RCAC permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/98"}, {"prov": [{"bbox": [136.073486328125, 322.1929931640625, 544.3384399414062, 368.567138671875], "page": 112, "span": [0, 343], "__ref_s3_data": null}], "text": "Given that row permission serves to eliminate logically rows from one or more sets, more rows can appear to be exceptions when RCAC is applied, as shown in Figure 6-13. Also, because column masks are applied after the query final results are determined, the masked value has no effect on the join processing and corresponding query result set.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.24554443359375, 60.59693908691406, 339.181640625, 69.93528747558594], "page": 112, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 6-13 Exception join with RCAC permission", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/99"}, {"prov": [{"bbox": [64.30990600585938, 27.93828010559082, 78.4020004272461, 37.535587310791016], "page": 112, "span": [0, 2], "__ref_s3_data": null}], "text": "96", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.766407012939453, 334.4214172363281, 37.29237747192383], "page": 112, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.58145904541016, 705.5357666015625, 469.4769287109375, 721.6285400390625], "page": 113, "span": [0, 50], "__ref_s3_data": null}], "text": "6.4 Monitoring, analyzing, and debugging with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.05673217773438, 655.0875854492188, 547.2247314453125, 689.4639892578125], "page": 113, "span": [0, 228], "__ref_s3_data": null}], "text": "It is assumed (and it is a critical success factor) that the database engineer or application developer has a thorough understanding of the DB2 for i Query Optimizer, Database Engine, and all the associated tools and techniques.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.80198669433594, 621.1078491210938, 547.1968383789062, 643.4088134765625], "page": 113, "span": [0, 163], "__ref_s3_data": null}], "text": "The monitoring, analyzing, and debugging process basically stays the same when RCAC row permissions or column masks are in place, with a few important differences:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.58511352539062, 592.2996215820312, 534.2526245117188, 614.6841430664062], "page": 113, "span": [0, 105], "__ref_s3_data": null}], "text": "GLYPH The underlying data access plan can be different and more complex based on the rule text.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.75184631347656, 575.1990966796875, 541.5543212890625, 585.4160766601562], "page": 113, "span": [0, 104], "__ref_s3_data": null}], "text": "GLYPH The database results can be reduced or modified based on the rule text and user profile.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.37033081054688, 546.2804565429688, 536.0465087890625, 568.6742553710938], "page": 113, "span": [0, 115], "__ref_s3_data": null}], "text": "GLYPH The run time of the request can be affected either positively or negatively based on the rule text.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5784149169922, 516.7872314453125, 547.224609375, 539.462890625], "page": 113, "span": [0, 119], "__ref_s3_data": null}], "text": "GLYPH For high-level language record level access, query plans must be considered, and not just program code.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.09255981445312, 447.2820129394531, 547.2296752929688, 505.3416442871094], "page": 113, "span": [0, 381], "__ref_s3_data": null}], "text": "During analyzing and debugging, it is important to account for all of the RCAC definitions for each table or file to understand the logic and corresponding work that is associated with processing the row permissions and column masks. It is also important to realize that, depending on the user profile in effect at run time, the database actions and query results can be different.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.950927734375, 341.0733337402344, 547.2786254882812, 435.3935241699219], "page": 113, "span": [0, 693], "__ref_s3_data": null}], "text": "RCAC is designed and implemented to be transparent to the user. It is possible for user \"Mike\" and user \"Hernando\" to run the exact same query, against the exact same data on the exact same system, and get different result sets. There is no error, no warning, and no indication that RCAC reduced or modified the respective answers that are returned. Furthermore, it is also likely that user \"Mike\" and user \"Hernando\" have different query run times even though it appears that everything is the same for both users. The actual query plan contains the RCAC logic, and this additional code path can alter the amount of work that is needed to produce results, based on the user running the query.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.83775329589844, 271.2449035644531, 547.328369140625, 329.3033142089844], "page": 113, "span": [0, 414], "__ref_s3_data": null}], "text": "When monitoring, analyzing, and debugging a database process when RCAC is enabled, it is critical to keep as many of the \"variables\" the same as possible. Use a good scientific process. For example, when re-creating a problem situation running under the same user profile with the same data and under the same conditions, it is almost mandatory. Otherwise, the database behavior and query results can be different.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.03610229492188, 201.1825714111328, 547.2515869140625, 259.4459228515625], "page": 113, "span": [0, 385], "__ref_s3_data": null}], "text": "To successfully perform monitoring, analyzing, and debugging when RCAC is enabled likely involves changes in the security and data access policies of the organization, and require new responsibilities, authority, and oversight within the data-centric application development community. As such, establishing and staffing the position of \"database engineer\" becomes even more important.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.31684112548828, 168.40090942382812, 325.99066162109375, 181.35137939453125], "page": 113, "span": [0, 41], "__ref_s3_data": null}], "text": "6.4.1 Query monitoring and analysis tools", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.78477478027344, 109.23886108398438, 543.2037353515625, 155.49432373046875], "page": 113, "span": [0, 309], "__ref_s3_data": null}], "text": "When monitoring and collecting metrics on database requests, DB2 for i provides additional information that indicates row permissions or column masks are being applied. This information is integrated and part of the standard tools, such as Visual Explain, SQL Plan Cache Snapshot, and SQL Performance Monitor.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [376.31317138671875, 27.83702850341797, 523.6287841796875, 37.11963653564453], "page": 113, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6370849609375, 27.93828010559082, 547.2591552734375, 37.61383819580078], "page": 113, "span": [0, 2], "__ref_s3_data": null}], "text": "97", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.3003387451172, 710.93408203125, 394.5509033203125, 721.3932495117188], "page": 114, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 6-14 shows how Visual Explain externalizes RCAC.", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [64.32495880126953, 430.2875671386719, 301.67059326171875, 439.9020690917969], "page": 114, "span": [0, 58], "__ref_s3_data": null}], "text": "Figure 6-14 Visual Explain indicating that RCAC is applied", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/100"}, {"prov": [{"bbox": [136.37713623046875, 403.63275146484375, 529.9888916015625, 413.7968444824219], "page": 114, "span": [0, 83], "__ref_s3_data": null}], "text": "Figure 6-15 shows the main dashboard of an SQL Performance Monitor. Click Summary .", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [64.45510864257812, 237.89434814453125, 223.10508728027344, 247.1595458984375], "page": 114, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-15 SQL Performance Monitor", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/101"}, {"prov": [{"bbox": [136.24908447265625, 199.04591369628906, 524.7570190429688, 221.4222412109375], "page": 114, "span": [0, 100], "__ref_s3_data": null}], "text": "Figure 6-16 shows the summary of an SQL Performance Monitor with an indication that RCAC is applied.", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [64.33348846435547, 94.5608901977539, 349.3365173339844, 103.83731079101562], "page": 114, "span": [0, 67], "__ref_s3_data": null}], "text": "Figure 6-16 SQL Performance Monitor indicating that RCAC is applied", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/102"}, {"prov": [{"bbox": [64.26004028320312, 27.93828010559082, 78.4020004272461, 37.59291458129883], "page": 114, "span": [0, 2], "__ref_s3_data": null}], "text": "98", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.39844512939453, 27.74201011657715, 334.4214172363281, 37.295169830322266], "page": 114, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.0850372314453, 699.2781372070312, 514.509765625, 721.4401245117188], "page": 115, "span": [0, 92], "__ref_s3_data": null}], "text": "Figure 6-17 shows the statements of an SQL Performance Monitor and how RCAC is externalized.", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [64.54953002929688, 562.5570068359375, 349.6691589355469, 571.9047241210938], "page": 115, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 6-17 SQL Performance Monitor showing statements and RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/103"}, {"prov": [{"bbox": [135.9917755126953, 463.65936279296875, 547.1959838867188, 546.1871948242188], "page": 115, "span": [0, 613], "__ref_s3_data": null}], "text": "When implementing RCAC as part of a comprehensive and pervasive data access control initiative, consider that the database monitoring and analysis tools can collect literal values that are passed as part of SQL statements. These literal values can be viewed as part of the information collected. If any of the literals are based on or are used with masked columns, it is important to review the database engineer's policy for viewing these data elements. For example, supposed that column CUSTOMER_TAX_ID is deemed masked for the database engineer and the CUSTOMER_TAX_ID column is used in a predicate as follows:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.02809143066406, 445.86871337890625, 321.6575622558594, 456.53887939453125], "page": 115, "span": [0, 37], "__ref_s3_data": null}], "text": "WHERE CUSTOMER_TAX_ID = '123-45-7890'", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [135.9283447265625, 400.6603698730469, 520.1124877929688, 434.6978454589844], "page": 115, "span": [0, 195], "__ref_s3_data": null}], "text": "The literal value of '123-45-7890' is visible to the analyst, effectively exposing sensitive information. If this is not acceptable, you must implement the SYSPROC.SET_COLUMN_ATTRIBUTE procedure.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.99343872070312, 354.4087829589844, 547.264404296875, 388.86968994140625], "page": 115, "span": [0, 217], "__ref_s3_data": null}], "text": "The SET_COLUMN_ATTRIBUTE procedure sets the SECURE attribute for a column so that variable values that are used for the column cannot be seen in the SQL Performance Monitor, SQL Plan Cache Snapshot, or Visual Explain.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.30596160888672, 322.03729248046875, 184.44000244140625, 334.6505432128906], "page": 115, "span": [0, 19], "__ref_s3_data": null}], "text": "6.4.2 Index advisor", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0715789794922, 274.205810546875, 544.1452026367188, 308.6105651855469], "page": 115, "span": [0, 265], "__ref_s3_data": null}], "text": "Because the RCAC rule text can be almost any valid SQL logic, including local selection predicates, join conditions, and subqueries, the standard query tuning techniques still apply. Without a doubt, a proper and adequate indexing strategy is a good starting point.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9353485107422, 204.24913024902344, 543.59814453125, 262.8050231933594], "page": 115, "span": [0, 434], "__ref_s3_data": null}], "text": "The index advisor is not specifically enhanced for RCAC, but because the rule text is a fully integrated part of the query plan, any opportunities for indexing is advised based on the current Query Optimizer functionality. If an index is advised because of the RCAC rule text logic, there is no RCAC reason code provided. Analyzing the query plan and the RCAC rule text provides the understanding as to why the index is being advised.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [376.3026428222656, 27.829248428344727, 523.6287841796875, 37.070919036865234], "page": 115, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.6923828125, 27.93828010559082, 547.2591552734375, 37.61021041870117], "page": 115, "span": [0, 2], "__ref_s3_data": null}], "text": "99", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.1690673828125, 698.9784545898438, 529.2249145507812, 721.1625366210938], "page": 116, "span": [0, 122], "__ref_s3_data": null}], "text": "For example, the query that is shown in Figure 6-18 produces index advice for the user's predicate and the RCAC predicate.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9779510498047, 401.4017639160156, 286.63140869140625, 410.4287109375], "page": 116, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 6-18 Index advice and RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/104"}, {"prov": [{"bbox": [136.1080322265625, 362.8584899902344, 530.6282958984375, 384.9075927734375], "page": 116, "span": [0, 116], "__ref_s3_data": null}], "text": "In Figure 6-19, index advisor is showing an index for the ACCOUNTS and CUSTOMERS tables based on the RCAC rule text.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.38304901123047, 225.15142822265625, 271.9134216308594, 234.57513427734375], "page": 116, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6-19 Index advisor based on the RCAC rule", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/105"}, {"prov": [{"bbox": [136.2445526123047, 186.40867614746094, 545.009521484375, 208.71290588378906], "page": 116, "span": [0, 116], "__ref_s3_data": null}], "text": "For more information about creating and using indexes, see IBM DB2 for i indexing methods and strategies , found at:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0568389892578, 157.3316192626953, 546.534423828125, 179.93490600585938], "page": 116, "span": [0, 108], "__ref_s3_data": null}], "text": "http://www.ibm.com/partnerworld/wps/servlet/ContentHandler/stg_ast_sys_wp_db2_i_in dexing_methods_strategies", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.29796600341797, 124.90902709960938, 251.73373413085938, 138.01373291015625], "page": 116, "span": [0, 29], "__ref_s3_data": null}], "text": "6.4.3 Metadata using catalogs", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.81069946289062, 65.56778717041016, 519.360595703125, 112.09072875976562], "page": 116, "span": [0, 281], "__ref_s3_data": null}], "text": "To make the discovery and identification of RCAC row permissions and column masks programmatically, query the QSYS2.SYSCONTROLS catalog view or the QSYS2.SYSCONTROLSDEP catalog view directly. Otherwise, the System i Navigator Database graphical interface can be used interactively.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.58773803710938, 27.93828010559082, 83.98200225830078, 37.618370056152344], "page": 116, "span": [0, 3], "__ref_s3_data": null}], "text": "100", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.61930084228516, 27.79102897644043, 339.92132568359375, 37.225154876708984], "page": 116, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.2389678955078, 711.0005493164062, 409.46722412109375, 721.5032958984375], "page": 117, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 6-20 shows the QSYS2.SYSCONTROLS catalog view.", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.65972137451172, 536.8526000976562, 197.23672485351562, 546.6951904296875], "page": 117, "span": [0, 29], "__ref_s3_data": null}], "text": "Figure 6-20 RCAC and catalogs", "type": "paragraph", "name": "paragraph", "font": null}, {"prov": [{"bbox": [136.26268005371094, 510.36981201171875, 430.36700439453125, 520.7019653320312], "page": 117, "span": [0, 60], "__ref_s3_data": null}], "text": "The SYSCONTROLS catalog view contains the following columns:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.75210571289062, 493.5987243652344, 229.3837890625, 503.4615173339844], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH COLUMN_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.86070251464844, 481.5989074707031, 231.54153442382812, 491.64453125], "page": 117, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH CONTROL_TYPE", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6691436767578, 469.5990905761719, 219.97596740722656, 479.86834716796875], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH CREATE_TIME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7273712158203, 457.5992736816406, 190.9468536376953, 467.8027648925781], "page": 117, "span": [0, 22], "__ref_s3_data": null}], "text": "GLYPH ENABLE", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.64234924316406, 445.5994567871094, 207.2530517578125, 455.4942626953125], "page": 117, "span": [0, 24], "__ref_s3_data": null}], "text": "GLYPH ENFORCED", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.51791381835938, 433.5996398925781, 220.03372192382812, 443.4760437011719], "page": 117, "span": [0, 26], "__ref_s3_data": null}], "text": "GLYPH ASP_NUMBER", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5097198486328, 421.5998229980469, 193.41262817382812, 431.81707763671875], "page": 117, "span": [0, 24], "__ref_s3_data": null}], "text": "GLYPH IMPLICIT", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7074737548828, 409.6000061035156, 182.29428100585938, 419.560302734375], "page": 117, "span": [0, 21], "__ref_s3_data": null}], "text": "GLYPH LABEL", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7837677001953, 397.6001892089844, 226.72982788085938, 407.70257568359375], "page": 117, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH LAST_ALTERED", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5884246826172, 385.6003723144531, 236.8487548828125, 395.9312744140625], "page": 117, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH LONG_COMMENT", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6427001953125, 373.6005554199219, 213.68124389648438, 383.6861877441406], "page": 117, "span": [0, 25], "__ref_s3_data": null}], "text": "GLYPH RCAC_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4962921142578, 361.6007385253906, 222.89993286132812, 372.0569763183594], "page": 117, "span": [0, 26], "__ref_s3_data": null}], "text": "GLYPH RCAC_OWNER", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.67913818359375, 349.6009216308594, 227.64552307128906, 359.7144470214844], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH RCAC_SCHEMA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7525634765625, 337.6011047363281, 202.9622802734375, 347.8081970214844], "page": 117, "span": [0, 24], "__ref_s3_data": null}], "text": "GLYPH RULETEXT", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.67474365234375, 325.6012878417969, 275.5697021484375, 335.66265869140625], "page": 117, "span": [0, 34], "__ref_s3_data": null}], "text": "GLYPH SYSTEM_COLUMN_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.66769409179688, 313.6014709472656, 262.74969482421875, 323.5975341796875], "page": 117, "span": [0, 33], "__ref_s3_data": null}], "text": "GLYPH SYSTEM_TABLE_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.64051818847656, 301.6016540527344, 276.5843505859375, 311.68450927734375], "page": 117, "span": [0, 35], "__ref_s3_data": null}], "text": "GLYPH SYSTEM_TABLE_SCHEMA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7092742919922, 289.6018371582031, 216.03579711914062, 299.60284423828125], "page": 117, "span": [0, 26], "__ref_s3_data": null}], "text": "GLYPH TABLE_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.76608276367188, 277.6020202636719, 230.14419555664062, 287.8016052246094], "page": 117, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH TABLE_SCHEMA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6082763671875, 265.6022033691406, 235.10260009765625, 275.61505126953125], "page": 117, "span": [0, 29], "__ref_s3_data": null}], "text": "GLYPH TBCORRELATION", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.31414794921875, 242.9756622314453, 451.0119934082031, 253.38050842285156], "page": 117, "span": [0, 63], "__ref_s3_data": null}], "text": "The SYSCONTROLSDEP catalog view contains the following columns:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.60992431640625, 226.60281372070312, 229.24020385742188, 236.86697387695312], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH COLUMN_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.8683319091797, 214.60301208496094, 231.54153442382812, 224.8516845703125], "page": 117, "span": [0, 28], "__ref_s3_data": null}], "text": "GLYPH CONTROL_TYPE", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.71621704101562, 202.60321044921875, 222.8195343017578, 212.88685607910156], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH IASP_NUMBER", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.70372009277344, 190.60340881347656, 225.03065490722656, 201.20953369140625], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH OBJECT_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7957763671875, 178.60360717773438, 239.2458953857422, 189.06430053710938], "page": 117, "span": [0, 29], "__ref_s3_data": null}], "text": "GLYPH OBJECT_SCHEMA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.60643005371094, 166.6038055419922, 222.27175903320312, 177.22776794433594], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH OBJECT_TYPE", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5543670654297, 154.60400390625, 241.48854064941406, 165.41989135742188], "page": 117, "span": [0, 30], "__ref_s3_data": null}], "text": "GLYPH PARM_SIGNATURE", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.70533752441406, 142.6042022705078, 213.68124389648438, 153.0066375732422], "page": 117, "span": [0, 25], "__ref_s3_data": null}], "text": "GLYPH RCAC_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7023162841797, 130.60440063476562, 227.54257202148438, 140.852783203125], "page": 117, "span": [0, 27], "__ref_s3_data": null}], "text": "GLYPH RCAC_SCHEMA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.80105590820312, 118.6045913696289, 262.728271484375, 129.09344482421875], "page": 117, "span": [0, 33], "__ref_s3_data": null}], "text": "GLYPH SYSTEM_TABLE_NAME", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.57916259765625, 106.60478210449219, 276.6519470214844, 117.10379791259766], "page": 117, "span": [0, 35], "__ref_s3_data": null}], "text": "GLYPH SYSTEM_TABLE_SCHEMA", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.38717651367188, 84.58521270751953, 495.9486389160156, 94.74239349365234], "page": 117, "span": [0, 81], "__ref_s3_data": null}], "text": "For more information, see the IBM i 7.2 DB2 for i SQL Reference Guide , found at:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.96670532226562, 55.894439697265625, 546.534423828125, 78.2451400756836], "page": 117, "span": [0, 86], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/db2/rbafzintro.htm?lang =en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [370.9178161621094, 28.03982925415039, 517.9691772460938, 37.072052001953125], "page": 117, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.4176025390625, 27.93828010559082, 547.2587890625, 37.66716003417969], "page": 117, "span": [0, 3], "__ref_s3_data": null}], "text": "101", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.68630981445312, 687.0557861328125, 524.18310546875, 721.531005859375], "page": 118, "span": [0, 65], "__ref_s3_data": null}], "text": "6.5 Views, materialized query tables, and query rewrite with RCAC", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.02333068847656, 648.2785034179688, 538.62841796875, 670.8335571289062], "page": 118, "span": [0, 133], "__ref_s3_data": null}], "text": "This section covers the implications to views, materialized query tables (MQTs), and query rewrite when RCAC is activated on a table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.40193939208984, 615.67724609375, 137.4498748779297, 628.7596435546875], "page": 118, "span": [0, 11], "__ref_s3_data": null}], "text": "6.5.1 Views", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9208526611328, 544.2852172851562, 547.2675170898438, 602.7634887695312], "page": 118, "span": [0, 453], "__ref_s3_data": null}], "text": "Any access to an SQL view that is over one or more tables that have RCAC also have those row permissions and column masking rules applied. If an SQL view has predicates, those are logically ANDed with any search condition that is specified in the permissions that are defined on the underlying tables. The view does not have to project the columns that are referenced by the permissions. Figure 6-21 shows an example of a view definition and user query.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.18788146972656, 249.9243621826172, 311.72760009765625, 259.39349365234375], "page": 118, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 6-21 View definition and user query", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/106"}, {"prov": [{"bbox": [64.70126342773438, 27.93828010559082, 83.98200225830078, 37.50507354736328], "page": 118, "span": [0, 3], "__ref_s3_data": null}], "text": "102", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.75050354003906, 27.767614364624023, 339.8848571777344, 37.27764892578125], "page": 118, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.91015625, 698.9927368164062, 519.4752807617188, 721.2984008789062], "page": 119, "span": [0, 97], "__ref_s3_data": null}], "text": "What the query optimizer plans for and what the database engine runs is shown in the Figure 6-22.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.19911193847656, 392.20526123046875, 291.4627990722656, 402.051513671875], "page": 119, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-22 Query rewrite with RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/107"}, {"prov": [{"bbox": [64.50540161132812, 360.3772888183594, 255.48699951171875, 373.21600341796875], "page": 119, "span": [0, 31], "__ref_s3_data": null}], "text": "6.5.2 Materialized query tables", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.82192993164062, 312.9986572265625, 547.2784423828125, 347.3133239746094], "page": 119, "span": [0, 271], "__ref_s3_data": null}], "text": "When the query to populate a materialized query table (MQT) is run by the system on either the create table or a refresh table, and one or more source tables have RCAC defined, the row permissions and column masks are ignored. This means that the MQT has all of the data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.07936096191406, 266.9794616699219, 547.2845458984375, 301.3565673828125], "page": 119, "span": [0, 242], "__ref_s3_data": null}], "text": "Because the MQT is a copy of the base table data, when a permission is created on the base table, all the related MQTs are altered to have a default row permission. This default permission prevents any of the rows from being directly queried.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.00421142578125, 220.8664093017578, 547.2724609375, 255.15774536132812], "page": 119, "span": [0, 266], "__ref_s3_data": null}], "text": "When a query implicitly uses an MQT, the underlying row permissions and column masks are built into the query that uses the MQT. In order for the MQT to be used for optimization, the MQT must include any columns that are used by the row permissions and column masks.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.21084594726562, 199.00047302246094, 342.15032958984375, 209.267333984375], "page": 119, "span": [0, 48], "__ref_s3_data": null}], "text": "The following example illustrates this scenario:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80001831054688, 181.96090698242188, 270.4134826660156, 191.80789184570312], "page": 119, "span": [0, 28], "__ref_s3_data": null}], "text": "1. Create schema and tables:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.20018005371094, 165.2699432373047, 266.09869384765625, 174.04469299316406], "page": 119, "span": [0, 22], "__ref_s3_data": null}], "text": "CREATE SCHEMA Schema1;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.79075622558594, 141.2703399658203, 547.2555541992188, 163.08201599121094], "page": 119, "span": [0, 89], "__ref_s3_data": null}], "text": "CREATE TABLE Schema1.employee(userID varchar(128), LocationID integer, Regionid integer);", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.46888732910156, 117.27072143554688, 531.0546264648438, 139.3306121826172], "page": 119, "span": [0, 135], "__ref_s3_data": null}], "text": "CREATE TABLE Schema1.Sales (INVOICE INTEGER NOT NULL, SALEAMT DECIMAL(5,2), TAXAMT DECIMAL(5,2), LOCATIONID INTEGER, REGIONID INTEGER);", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [370.84039306640625, 28.06173324584961, 517.9691772460938, 37.0665397644043], "page": 119, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.435302734375, 27.93828010559082, 547.2587890625, 37.50189971923828], "page": 119, "span": [0, 3], "__ref_s3_data": null}], "text": "103", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.5824203491211, 27.93828010559082, 83.98200225830078, 37.50394058227539], "page": 120, "span": [0, 3], "__ref_s3_data": null}], "text": "104", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.59403228759766, 27.74925994873047, 339.9430236816406, 37.371910095214844], "page": 120, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.20985412597656, 699.2785034179688, 545.8660888671875, 721.5411376953125], "page": 120, "span": [0, 99], "__ref_s3_data": null}], "text": "2. Create a row permission that allows the employees to see only rows from the region they work in:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.23133850097656, 598.3897705078125, 547.1957397460938, 692.9283447265625], "page": 120, "span": [0, 281], "__ref_s3_data": null}], "text": "/* Create permission that only allows the employees to see rows from the region they work in */ CREATE PERMISSION Schema1.Sales_PERM1 ON schema1.sales FOR ROWS WHERE CURRENT_USER in (SELECT userId FROM schema1.employee E WHERE e.regionid = regionid) ENFORCED FOR ALL ACCESS ENABLE;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.14837646484375, 580.844482421875, 362.0214538574219, 591.3262939453125], "page": 120, "span": [0, 48], "__ref_s3_data": null}], "text": "3. Create an MQT to summarize sales by location:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.20016479492188, 480.57073974609375, 545.9945678710938, 573.34423828125], "page": 120, "span": [0, 307], "__ref_s3_data": null}], "text": "-- Create MQT to summarize sales by location -- This has all of the data. The schema1.sales_perm1 predicate was not applied CREATE TABLE Schema1.Location_Sales_MQT as AS (SELECT LocationID, SUM(Saleamt) as Total_Location_Sales FROM SCHEMA1.SALES GROUP BY LOCATIONID) DATA INITIALLY DEFERRED REFRESH DEFERRED", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.27374267578125, 468.5709228515625, 251.09893798828125, 478.4224548339844], "page": 120, "span": [0, 19], "__ref_s3_data": null}], "text": "MAINTAINED BY USER;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.06195068359375, 450.712158203125, 354.3462829589844, 461.3641052246094], "page": 120, "span": [0, 48], "__ref_s3_data": null}], "text": "4. Populate the MQT (permission is not applied):", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.20016479492188, 422.5517272949219, 416.03656005859375, 443.3263244628906], "page": 120, "span": [0, 93], "__ref_s3_data": null}], "text": "/* Populate the MQT - Permission not applied here */ REFRESH TABLE Schema1.Location_Sales_MQT", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.42251586914062, 392.75518798828125, 547.1997680664062, 415.1828308105469], "page": 120, "span": [0, 166], "__ref_s3_data": null}], "text": "The following query matches Location_Sales_MQT, but it cannot be used because it does not have column regionid, which is needed by the schema1.sales_PERM1 permission:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [151.20016479492188, 364.5924987792969, 401.0367736816406, 385.3670959472656], "page": 120, "span": [0, 71], "__ref_s3_data": null}], "text": "SELECT Locationid, sum(SALEAMT) FROM schema1.sales GROUP BY locationid;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.57110595703125, 347.032958984375, 385.903564453125, 357.5345153808594], "page": 120, "span": [0, 53], "__ref_s3_data": null}], "text": "5. Create an MQT to summarize by region and location:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.25201416015625, 246.57443237304688, 500.9953308105469, 340.11907958984375], "page": 120, "span": [0, 273], "__ref_s3_data": null}], "text": "-- MQT to summarize by region and location Create table schema1.Region_Location_Sales_MQT as AS (SELECT REGIONID, LocationID, SUM(Saleamt) as Total_Location_Sales FROM SCHEMA1.SALES GROUP BY REGIONID, LOCATIONID) DATA INITIALLY DEFERRED REFRESH DEFERRED MAINTAINED BY USER;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [136.15359497070312, 228.8771514892578, 452.1078186035156, 239.49000549316406], "page": 120, "span": [0, 67], "__ref_s3_data": null}], "text": "6. Populate the Region_location_Sales_MQT (permission not applied):", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.35955810546875, 199.56198120117188, 535.9747924804688, 222.3959503173828], "page": 120, "span": [0, 122], "__ref_s3_data": null}], "text": "/* Populate the Region_location_Sales_MQT - Permission not applied here */ Refresh table schema1.Region_Location_Sales_MQT", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [150.356689453125, 171.1383056640625, 502.06903076171875, 193.5333251953125], "page": 120, "span": [0, 140], "__ref_s3_data": null}], "text": "The following query can use the Region_location_SALES_MQT because it has REGIONID, which is required for the schema1.sales_PERM1 permission:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [151.20018005371094, 142.53627014160156, 401.0367736816406, 163.31082153320312], "page": 120, "span": [0, 71], "__ref_s3_data": null}], "text": "SELECT Locationid, sum(SALEAMT) FROM schema1.sales GROUP BY locationid;", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.85989379882812, 710.8369750976562, 376.0711669921875, 721.2571411132812], "page": 121, "span": [0, 55], "__ref_s3_data": null}], "text": "This example has the following additional implications:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.5341796875, 670.2985229492188, 547.2718505859375, 704.5060424804688], "page": 121, "span": [0, 199], "__ref_s3_data": null}], "text": "GLYPH Users must be prevented from explicitly querying the MQT or a view that is created over it. Those two cases bypass the row permission and column mask rules from the underlying tables.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.635986328125, 616.6964721679688, 547.3106079101562, 663.2953491210938], "page": 121, "span": [0, 300], "__ref_s3_data": null}], "text": "GLYPH If the user writes code to update incrementally an MQT, that code must be run from a user that has permission to view all of the rows and all columns in their unmasked state. Otherwise, the MQT contents are not complete and queries that implicitly use the MQT might get wrong results.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.70452880859375, 588.2799072265625, 539.1951904296875, 610.5258178710938], "page": 121, "span": [0, 126], "__ref_s3_data": null}], "text": "GLYPH To prevent this, a check constraint can be created to cause an error if masked data was inserted into the MQT.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.31158447265625, 555.6721801757812, 184.48561096191406, 568.6481323242188], "page": 121, "span": [0, 19], "__ref_s3_data": null}], "text": "6.5.3 Query rewrite", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.25848388671875, 520.2984619140625, 527.1223754882812, 542.4752197265625], "page": 121, "span": [0, 110], "__ref_s3_data": null}], "text": "Query rewrite is a technique that the optimizer can use to change the original request to improve performance.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0530548095703, 450.2796325683594, 547.158935546875, 508.5531311035156], "page": 121, "span": [0, 401], "__ref_s3_data": null}], "text": "For example, a query that references Table1 might be rewritten to access an MQT over Table1, or it might also be optimized to access only the fields in an index that is defined over Table1 and avoid touching Table1. With RCAC, defining these rewrites can still occur, but the MQT or index also must include all columns that are needed by the row permissions or column masks that are defined on Table1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1699981689453, 403.54132080078125, 547.3839721679688, 438.06658935546875], "page": 121, "span": [0, 197], "__ref_s3_data": null}], "text": "As part of adding RCAC, the impact to these potentially significant performance optimizations must be considered. Usage of MQTs or index-only access might be reduced or eliminated by enabling RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.51546478271484, 360.6579895019531, 436.9425048828125, 376.25299072265625], "page": 121, "span": [0, 47], "__ref_s3_data": null}], "text": "6.6 RCAC effects on performance and scalability", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.87310791015625, 249.67189025878906, 547.3291625976562, 344.3325500488281], "page": 121, "span": [0, 696], "__ref_s3_data": null}], "text": "As with any discussion that is related to performance and scalability, nothing is certain or guaranteed. There are always many variables that are involved. First, a good foundation of knowledge and skill is required to appreciate fully what is occurring when a database request is handled within an RCAC enabled environment. Implementing the row permission or column masks involves the query optimizer and database engine. The process that identifies the rows that you have permission to access is considered a \"query\", and as such a query plan must be formulated. In the case of SQL requests, the RCAC portion of the query is combined with the user's query, much like a query referencing a view.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.00497436523438, 180.19772338867188, 547.2525634765625, 238.8109130859375], "page": 121, "span": [0, 455], "__ref_s3_data": null}], "text": "For native record level access, this RCAC \"query\" is also built and used to test the permission. When a file is opened, the RCAC rule text logic is included, optimized, and run as part of the native read, write, update, or delete operation. The amount of work (and time) required to identify the record based on the user's permission is directly related to the complexity and depth of the logic that is needed to identify the records that can be returned.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.76473999023438, 73.70987701416016, 547.2844848632812, 167.78012084960938], "page": 121, "span": [0, 698], "__ref_s3_data": null}], "text": "A simple example to illustrate this concept is a random read using a keyed logical file (that is, an index). In its purest form, a random read uses two data access methods: index probe (find the key and RRN) and table probe (find the record using RRN). If the RCAC rule text specifies five nested subqueries to determine whether the user has access to the record, this logic must be added to the path. The subquery processing now becomes part of the original \"random read\" request. Instead of two simple I/Os to retrieve the record, there can be a minimum of 12 I/Os to retrieve the same record. These I/Os can be done with a result of \"not found\" if the user is not entitled to any of the records.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [370.8880615234375, 28.07004165649414, 517.9691772460938, 37.09161376953125], "page": 121, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.3849487304688, 27.93828010559082, 547.2587890625, 37.579017639160156], "page": 121, "span": [0, 3], "__ref_s3_data": null}], "text": "105", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.63807678222656, 27.93828010559082, 83.98200225830078, 37.42599105834961], "page": 122, "span": [0, 3], "__ref_s3_data": null}], "text": "106", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.7645034790039, 27.739063262939453, 339.9179382324219, 37.333919525146484], "page": 122, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.05857849121094, 674.9677124023438, 543.5155029296875, 721.5306396484375], "page": 122, "span": [0, 285], "__ref_s3_data": null}], "text": "For programs that access records sequentially, in or out of key order, the added RCAC logic can have a profound effect on the performance and scalability. Reading the \"next record\" in order is no longer a simple matter of positioning to the next available key, as shown in Figure 6-23.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3397216796875, 378.350341796875, 333.39794921875, 388.2008972167969], "page": 122, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 6-23 Native record access with no RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/108"}, {"prov": [{"bbox": [136.0682373046875, 674.9595336914062, 547.295654296875, 721.4103393554688], "page": 123, "span": [0, 305], "__ref_s3_data": null}], "text": "Before the record, as identified by the key, is considered available, the RCAC logic must be run. If the record is rejected by RCAC, the next record in sequence that is permissible must be identified. This spinning through the records can take a long time and uses many resources, as shown in Figure 6-24.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.14874267578125, 374.3591613769531, 341.5462951660156, 383.96697998046875], "page": 123, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6-24 Native record level access with RCAC", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/109"}, {"prov": [{"bbox": [136.06741333007812, 335.3708801269531, 525.8615112304688, 357.78802490234375], "page": 123, "span": [0, 134], "__ref_s3_data": null}], "text": "After the row permissions and column masks are designed and implemented, adequate performance and scalability testing are recommended.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.41442108154297, 292.18603515625, 510.04888916015625, 308.5864562988281], "page": 123, "span": [0, 58], "__ref_s3_data": null}], "text": "6.7 Exclusive lock to implement RCAC (availability issues)", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.7480926513672, 242.07867431640625, 547.2496948242188, 276.0059814453125], "page": 123, "span": [0, 205], "__ref_s3_data": null}], "text": "When defining permissions or enabling RCAC, an exclusive lock on the base table is obtained. The impact to other applications depends on the order of create permission and the alter table to activate RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.33482360839844, 219.52285766601562, 283.20501708984375, 230.26702880859375], "page": 123, "span": [0, 33], "__ref_s3_data": null}], "text": "Consider the following scenarios:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.61669921875, 203.0792999267578, 464.85845947265625, 213.4561004638672], "page": 123, "span": [0, 84], "__ref_s3_data": null}], "text": "GLYPH Scenario 1: Adding permissions and RCAC is not enabled on the table:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.18515014648438, 174.03993225097656, 547.4009399414062, 196.03309631347656], "page": 123, "span": [0, 106], "__ref_s3_data": null}], "text": "-Job 1 reading data from the table (open for input) holds a *SHRRD on the member and a *SHRRD on the data.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.15493774414062, 145.06031799316406, 546.0185546875, 167.4003143310547], "page": 123, "span": [0, 126], "__ref_s3_data": null}], "text": "-Job 2 adding, updating, or deleting rows from table (open for output) holds a *SHRRD on the member and a *SHRUPD on the data.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.300537109375, 127.89167022705078, 546.8192749023438, 138.0634307861328], "page": 123, "span": [0, 84], "__ref_s3_data": null}], "text": "-Job 4 allocates the object and gets a *SHRRD on the file and a *EXCLRD on the data.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.9271697998047, 87.04132080078125, 547.1649169921875, 121.13821411132812], "page": 123, "span": [0, 196], "__ref_s3_data": null}], "text": "-Job 3 attempts to add a permission to the table. Permission is added and the pseudo-closed cursors for Job1 and Job 2 are closed. Job 4 still holds the *SHRRD on the file and *EXCLRD on the data.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.55613708496094, 57.62049102783203, 545.1102905273438, 80.00410461425781], "page": 123, "span": [0, 134], "__ref_s3_data": null}], "text": "The net result from Scenario 1 is that you can add permissions without having to end the applications that are reading the base table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [370.8280334472656, 28.034542083740234, 517.9691772460938, 37.04221725463867], "page": 123, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.3724975585938, 27.93828010559082, 547.2587890625, 37.605224609375], "page": 123, "span": [0, 3], "__ref_s3_data": null}], "text": "107", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.501953125, 699.1420288085938, 547.2257080078125, 721.4761352539062], "page": 124, "span": [0, 171], "__ref_s3_data": null}], "text": "GLYPH Scenario 2: Altering a table to activate RCAC requires that all applications using the table be ended. The alter table requires exclusive use of the table.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6831512451172, 645.1512451171875, 547.350341796875, 692.8861694335938], "page": 124, "span": [0, 375], "__ref_s3_data": null}], "text": "GLYPH Scenario 3: Altering the table to activate RCAC before the permissions are added. The alter table requires exclusive use of the table, as in scenario 2. All applications must be ended to perform this alter. After the alter is complete, any applications trying to read data do not get any results, and attempts to insert new rows returns the following message:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.52105712890625, 629.384521484375, 451.01605224609375, 639.4539184570312], "page": 124, "span": [0, 59], "__ref_s3_data": null}], "text": "SQ20471] INSERT or UPDATE does not satisfy row permissions.", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [150.43138122558594, 588.2802734375, 532.7249145507812, 622.7317504882812], "page": 124, "span": [0, 239], "__ref_s3_data": null}], "text": "To create a permission in this case requires that you end all the applications, unlike scenario 1 where permissions can be added while the applications were active. In this case, the applications must be ended to run the create permission.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.41513061523438, 544.8763427734375, 380.354736328125, 561.1818237304688], "page": 124, "span": [0, 39], "__ref_s3_data": null}], "text": "6.8 Avoiding propagation of masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.85028076171875, 494.2587890625, 547.30224609375, 528.1173706054688], "page": 124, "span": [0, 275], "__ref_s3_data": null}], "text": "Operations such as insert or update into a table with active column access control can fail if the input data is masked data. This can happen when data to be inserted or updated contains the masked value as a result of a SELECT from a table with active column access control.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.38754272460938, 460.2991638183594, 547.1968383789062, 482.6835632324219], "page": 124, "span": [0, 178], "__ref_s3_data": null}], "text": "For example, assume TABLE1 and TABLE2 have active column access control and for insert, selecting data from TABLE2 returns the masked data. The following INSERT returns an error:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.7989959716797, 443.54840087890625, 331.6763000488281, 452.850341796875], "page": 124, "span": [0, 39], "__ref_s3_data": null}], "text": "INSERT INTO TABLE1 SELECT * FROM TABLE2", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [135.99356079101562, 408.8760070800781, 533.7767333984375, 431.30462646484375], "page": 124, "span": [0, 152], "__ref_s3_data": null}], "text": "The masked data that is returned from the SELECT * FROM TABLE2 might not be valid input data for TABLE1 because of data type or column check constraint.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.01171875, 374.921630859375, 532.6522827148438, 397.3339538574219], "page": 124, "span": [0, 114], "__ref_s3_data": null}], "text": "There are two ways to prevent this situation from happening: Define a check constraint or create a before trigger.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.31849670410156, 342.6772766113281, 260.1020202636719, 355.5576477050781], "page": 124, "span": [0, 31], "__ref_s3_data": null}], "text": "6.8.1 Check constraint solution", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.2911376953125, 319.0434875488281, 416.498779296875, 329.2791442871094], "page": 124, "span": [0, 64], "__ref_s3_data": null}], "text": "One way to prevent this problem is to define a check constraint.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.77337646484375, 249.27944946289062, 547.2566528320312, 307.6162109375], "page": 124, "span": [0, 382], "__ref_s3_data": null}], "text": "As part of RCAC, new SQL syntax is provided to allow an action to be performed when a violation of the check constraints check condition occurs instead of giving that error. However, if the check condition is still not met after the action, a hard error is returned. A check constraint with the new on-violation-clause is allowed on both the CREATE TABLE and ALTER TABLE statements.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9890899658203, 203.26028442382812, 547.2803955078125, 237.4848175048828], "page": 124, "span": [0, 217], "__ref_s3_data": null}], "text": "In the Example 6-4, the mask is defined to return a value of 'XXX-XX-nnnn' for any query that is not done by a user profile in the DBMGR group. The constraint checks that the column SSN does not have the masked value.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.4429702758789, 181.8981170654297, 277.19195556640625, 191.441650390625], "page": 124, "span": [0, 49], "__ref_s3_data": null}], "text": "Example 6-4 Check constraint to avoid masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.3324966430664, 54.632633209228516, 414.59515380859375, 173.83340454101562], "page": 124, "span": [0, 297], "__ref_s3_data": null}], "text": "CREATE SCHEMA MY_LIB SET SCHEMA MY_LIB CREATE TABLE MY_LIB.EMP_INFO (COL1_name CHAR(10) WITH DEFAULT 'DEFAULT', COL2_ssn CHAR(11) WITH DEFAULT 'DEFAULT') CREATE MASK MASK_ssn ON MY_LIB.EMP_INFO FOR COLUMN COL2_ssn RETURN CASE WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBMGR' ) = 1 THEN COL2_ssn", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.66724395751953, 27.93828010559082, 83.98200225830078, 37.50567626953125], "page": 124, "span": [0, 3], "__ref_s3_data": null}], "text": "108", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.35574340820312, 27.725317001342773, 339.9448547363281, 37.34313201904297], "page": 124, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.85445022583008, 599.5280151367188, 545.2154541015625, 721.5641479492188], "page": 125, "span": [0, 418], "__ref_s3_data": null}], "text": "ELSE 'XXX-XX-'||SUBSTR(COL2_ssn,8,4) END ENABLE | /* Check constraint for the update and insert.*/ ALTER TABLE MY_LIB.EMP_INFO ADD CONSTRAINT MASK_ssn_preserve CHECK(SUBSTR(COL2_ssn,1,7)<>'XXX-XX-') -- Allow any value other than the mask ON UPDATE VIOLATION PRESERVE COL2_ssn -- Don't update the mask portion of the existing value ON INSERT VIOLATION SET COL2_ssn = DEFAULT -- for insert set this to the default value.", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.34185028076172, 563.6049194335938, 240.5440673828125, 576.7313842773438], "page": 125, "span": [0, 29], "__ref_s3_data": null}], "text": "6.8.2 Before trigger solution", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.21571350097656, 528.1804809570312, 547.3193359375, 550.7018432617188], "page": 125, "span": [0, 171], "__ref_s3_data": null}], "text": "The actions that are described in Example 6-4 on page 108 for ON UPDATE VIOLATION and ON INSERT VIOLATION also can be handled by a before trigger, as shown in Example 6-5.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.4420166015625, 505.8026428222656, 336.79412841796875, 516.0908203125], "page": 125, "span": [0, 47], "__ref_s3_data": null}], "text": "Example 6-5 Before trigger to avoid masked data", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.70103454589844, 380.07110595703125, 508.6725158691406, 500.4696044921875], "page": 125, "span": [0, 298], "__ref_s3_data": null}], "text": "CREATE TRIGGER PREVENT_MASK_SSN BEFORE INSERT OR UPDATE ON MY_LIB.EMP_INFO REFERENCING NEW ROW AS N OLD ROW AS O FOR EACH ROW MODE DB2ROW SECURED WHEN(SUBSTR(N.COL2_ssn,1,7) = 'XXX-XX-') BEGIN IF INSERTING THEN SET N.COL2_ssn = DEFAULT; ELSEIF UPDATING THEN SET N.COL2_ssn = O.COL2_ssn; END IF; END", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.51354217529297, 330.2432861328125, 360.91705322265625, 346.4449462890625], "page": 125, "span": [0, 36], "__ref_s3_data": null}], "text": "6.9 Triggers and functions (SECURED)", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.73545837402344, 255.84051513671875, 547.2467651367188, 314.6852722167969], "page": 125, "span": [0, 409], "__ref_s3_data": null}], "text": "There are some considerations that must be considered when there are triggers and functions on tables that have RCAC enabled. The purpose of SECURE for triggers and functions is so that a user who is allowed to create a trigger or function is not necessarily able to make it SECURE themselves. This prevents the trigger/function developer from adding code that skims off data that they are not allowed to see.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.3808822631836, 222.538818359375, 151.61126708984375, 235.96595764160156], "page": 125, "span": [0, 14], "__ref_s3_data": null}], "text": "6.9.1 Triggers", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.48204040527344, 164.04632568359375, 547.2885131835938, 210.0581817626953], "page": 125, "span": [0, 318], "__ref_s3_data": null}], "text": "Triggers have access to the data in rows outside of the row permission or column masking. An after trigger has access to the new row image after the permission has allowed the update or insert to occur. Therefore, the triggers can potentially change the insert or update image value so that it violates the permission.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [370.8166809082031, 28.073514938354492, 517.9691772460938, 37.06842803955078], "page": 125, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.3563842773438, 27.93828010559082, 547.2587890625, 37.650997161865234], "page": 125, "span": [0, 3], "__ref_s3_data": null}], "text": "109", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.57964324951172, 27.93828010559082, 83.98200225830078, 37.686649322509766], "page": 126, "span": [0, 3], "__ref_s3_data": null}], "text": "110", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.65092468261719, 27.779094696044922, 339.9294738769531, 37.28006362915039], "page": 126, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.88360595703125, 674.990234375, 547.1917114257812, 721.3570556640625], "page": 126, "span": [0, 362], "__ref_s3_data": null}], "text": "Any triggers that are defined on a table must be created with an attribute that designates that it is SECURED when RCAC definitions are created or altered for that table, as shown in Example 6-6. The same applies to a view that has an instead of trigger. That trigger must be secure at the point RCAC is enabled for any of the underlying tables the view is over.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.5281524658203, 653.42236328125, 269.4378967285156, 663.7034301757812], "page": 126, "span": [0, 27], "__ref_s3_data": null}], "text": "Example 6-6 Trigger SECURED", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.4291229248047, 513.5558471679688, 508.6703796386719, 648.5309448242188], "page": 126, "span": [0, 347], "__ref_s3_data": null}], "text": "/* Trigger created with the SECURED attribute */ CREATE TRIGGER PREVENT_MASK_SSN BEFORE INSERT OR UPDATE ON MY_LIB.EMP_INFO REFERENCING NEW ROW AS N OLD ROW AS O FOR EACH ROW MODE DB2ROW SECURED WHEN(SUBSTR(N.COL2_ssn,1,7) = 'XXX-XX-') BEGIN IF INSERTING THEN SET N.COL2_ssn = DEFAULT; ELSEIF UPDATING THEN SET N.COL2_ssn = O.COL2_ssn; END IF; END", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.23265075683594, 476.65728759765625, 166.5321044921875, 489.4510192871094], "page": 126, "span": [0, 15], "__ref_s3_data": null}], "text": "6.9.2 Functions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.7004852294922, 429.2786560058594, 547.2664794921875, 463.3634948730469], "page": 126, "span": [0, 220], "__ref_s3_data": null}], "text": "Within a CREATE PERMISSION or CREATE MASK , a function can be called. Because that UDF has access to the data before the RCAC rules are applied, the SECURE attribute is required on that function, as shown in Example 6-7.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.63314819335938, 407.6082763671875, 337.031494140625, 417.7618408203125], "page": 126, "span": [0, 44], "__ref_s3_data": null}], "text": "Example 6-7 Specifying SECURED on a function", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.52565002441406, 270.5191650390625, 446.8673400878906, 403.1766662597656], "page": 126, "span": [0, 230], "__ref_s3_data": null}], "text": "CREATE PERMISSION SCHEMA.PERM1 ON SCHEMA.TABLE1 FOR ROWS WHERE MY_UDF(CURRENT_USER,COLUMN1) = 1 ENFORCED FOR ALL ACCESS ENABLE; CREATE FUNCTION MY_UDF (INP1 CHAR(32), INP2 INTEGER) Returns INTEGER LANGUAGE SQL CONTAINS SQL SECURED", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [135.83522033691406, 204.99681091308594, 547.2584838867188, 251.3785858154297], "page": 126, "span": [0, 337], "__ref_s3_data": null}], "text": "The SECURED attribute of MY_UDF signifies that the function is considered secure for RCAC. If a function is called from an SQL statement, and references a column in a table that has RCAC, it must be declared as secure. In that case, if the secure function calls other functions, they are not validated to confirm whether they are secure.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2251434326172, 183.0125732421875, 282.6751708984375, 193.37486267089844], "page": 126, "span": [0, 32], "__ref_s3_data": null}], "text": "Consider the following examples:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.4008026123047, 108.28175354003906, 547.1887817382812, 176.30027770996094], "page": 126, "span": [0, 334], "__ref_s3_data": null}], "text": "GLYPH Table1 has RCAC defined and enabled. SELECT MY_UDF2(Column2) from schema.table1. MY_UDF2 must be created with the SECURED attribute. If MY_UDF2 invokes MY_UDF3, there is no checking to ensure that it is also created with SECURED. NOT SECURED is the default on the create function unless SECURED is explicitly selected.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.5240478515625, 78.86335754394531, 523.39453125, 101.21600341796875], "page": 126, "span": [0, 108], "__ref_s3_data": null}], "text": "This same rule applies for any function that might be invoked with a masked column specified as an argument.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.78050231933594, 682.29833984375, 537.510986328125, 721.4716796875], "page": 127, "span": [0, 206], "__ref_s3_data": null}], "text": "GLYPH Table2 column SSN has a column mask that is defined on it. SELECT MY_UDF4(SSN) from table2. Because SSN has a column mask that is defined, MY_UDF4 must be created with the SECURED attribute.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.45398712158203, 638.97607421875, 387.579833984375, 655.1737060546875], "page": 127, "span": [0, 42], "__ref_s3_data": null}], "text": "6.10 RCAC is only one part of the solution", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.8080291748047, 564.271484375, 547.2545776367188, 622.5728759765625], "page": 127, "span": [0, 379], "__ref_s3_data": null}], "text": "When designing and implementing RCAC row permissions, special attention should be given to the effectiveness and limitations of controlling data access. Data can be housed in objects other than tables or physical files. The role and responsibility of the database user, for example, the database engineer, must be reconciled with their respective authority and access privileges.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1082000732422, 529.473388671875, 544.8680419921875, 551.9183349609375], "page": 127, "span": [0, 146], "__ref_s3_data": null}], "text": "Figure 6-25 illustrates that object level security is the first check and that RCAC permissions provide control only on tables and physical files.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.07928466796875, 218.3541717529297, 366.65814208984375, 228.08248901367188], "page": 127, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 6-25 Object-level security and RCAC permissions", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/110"}, {"prov": [{"bbox": [135.93563842773438, 179.97857666015625, 547.2168579101562, 201.9317626953125], "page": 127, "span": [0, 121], "__ref_s3_data": null}], "text": "To get access to the table and the rows, the user must pass the object level authority test and the RCAC permission test.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6477508544922, 133.73806762695312, 547.2177124023438, 167.70111083984375], "page": 127, "span": [0, 232], "__ref_s3_data": null}], "text": "The IBM i journal captures the transactional data and places an image of the row in the journal receiver. If the user has access to the journal receiver, the row image can be viewed if the user has authority to the journal receiver.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.03945922851562, 87.78772735595703, 547.24267578125, 122.0500259399414], "page": 127, "span": [0, 222], "__ref_s3_data": null}], "text": "Although the SQL Plan Cache data, the SQL Plan Cache Snapshot data, and the SQL Performance Monitor data do not reveal the results of queries, they can show the literal values that are passed along with the SQL statements.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [370.82598876953125, 28.084239959716797, 517.9691772460938, 37.07847213745117], "page": 127, "span": [0, 36], "__ref_s3_data": null}], "text": "Chapter 6. Additional considerations", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.201904296875, 27.93828010559082, 547.2587890625, 37.48694610595703], "page": 127, "span": [0, 3], "__ref_s3_data": null}], "text": "111", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.64434051513672, 27.93828010559082, 83.98200225830078, 37.513153076171875], "page": 128, "span": [0, 3], "__ref_s3_data": null}], "text": "112", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.78141784667969, 27.795108795166016, 339.9560546875, 37.278629302978516], "page": 128, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.89059448242188, 674.9641723632812, 547.2962646484375, 721.255615234375], "page": 128, "span": [0, 332], "__ref_s3_data": null}], "text": "The ability to monitor, analyze, debug, and tune data-centric applications effectively and efficiently requires some understanding of the underlying data, or at least the attributes of the data. The organization must be willing to reconcile the conflicting requirements of \"restricting access to data\", and \"needing access to data\".", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/111"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 129, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 7.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 129, "span": [0, 1], "__ref_s3_data": null}], "text": "7", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 481.3484191894531, 547.2606201171875, 538.65869140625], "page": 129, "span": [0, 40], "__ref_s3_data": null}], "text": "Row and Column Access Control management", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.03298950195312, 409.35833740234375, 530.23193359375, 443.9773254394531], "page": 129, "span": [0, 253], "__ref_s3_data": null}], "text": "After Row and Column Access Control (RCAC) definitions are defined and activated in a database, your management processes must be adjusted to accommodate these new security controls. This chapter highlights some of the changes that should be considered.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.26803588867188, 387.5925598144531, 347.4121398925781, 398.0865478515625], "page": 129, "span": [0, 49], "__ref_s3_data": null}], "text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.7023162841797, 370.8629455566406, 356.2835388183594, 381.1903076171875], "page": 129, "span": [0, 57], "__ref_s3_data": null}], "text": "GLYPH Managing row permissions and column masks", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.62911987304688, 358.95751953125, 406.06463623046875, 369.439453125], "page": 129, "span": [0, 69], "__ref_s3_data": null}], "text": "GLYPH Managing tables with row permissions and column masks", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.61477661132812, 346.9558410644531, 323.0509338378906, 357.3504943847656], "page": 129, "span": [0, 54], "__ref_s3_data": null}], "text": "GLYPH Monitoring and auditing function usage", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.98678970336914, 27.78120994567871, 257.24334716796875, 37.33966064453125], "page": 129, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.2393188476562, 27.93828010559082, 547.2587890625, 37.526824951171875], "page": 129, "span": [0, 3], "__ref_s3_data": null}], "text": "113", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.21974182128906, 702.385498046875, 449.7918701171875, 718.162109375], "page": 130, "span": [0, 45], "__ref_s3_data": null}], "text": "7.1 Managing row permissions and column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.92994689941406, 675.7836303710938, 541.12109375, 685.9977416992188], "page": 130, "span": [0, 84], "__ref_s3_data": null}], "text": "This section focuses on the management of the RCAC row permissions and column masks.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.02565002441406, 643.1517333984375, 228.30335998535156, 656.0827026367188], "page": 130, "span": [0, 23], "__ref_s3_data": null}], "text": "7.1.1 Source management", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.09738159179688, 583.5279541015625, 546.4277954101562, 630.0850219726562], "page": 130, "span": [0, 287], "__ref_s3_data": null}], "text": "The SQL statements that are used to define row permissions and column masks should be managed with a change management process. Ideally, you already are using a change management process for your database definitions, and that same process can be extended to cover your RCAC definitions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.02606201171875, 490.18017578125, 547.2933959960938, 571.9515991210938], "page": 130, "span": [0, 596], "__ref_s3_data": null}], "text": "If you are using SQL DDL to define your DB2 tables, then you have the option of adding the RCAC definitions to the same source file as the table definition. The benefit of this approach is that it keeps all DDL that is related to a table in a single source file. The downside is that if you must re-create only the RCAC definitions and leave the table unchanged, then you must identify and extract only the RCAC definitions from the source file. There are situations where the row permissions and column masks must be changed or re-created without changing the definition of the associated table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.02523803710938, 457.12799072265625, 231.4643096923828, 470.1705322265625], "page": 130, "span": [0, 27], "__ref_s3_data": null}], "text": "7.1.2 Modifying definitions", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.07144165039062, 397.6028137207031, 547.1988525390625, 443.98638916015625], "page": 130, "span": [0, 334], "__ref_s3_data": null}], "text": "After RCAC is activated for a table, the row permission and column mask definitions can be re-created to change the data access behavior for that table. Usage of the OR REPLACE clause on the CREATE MASK and CREATE PERMISSION SQL statements simplifies the re-creation process by folding in the deletion of the existing RCAC definition.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.90682983398438, 339.7182312011719, 547.2922973632812, 385.9692687988281], "page": 130, "span": [0, 296], "__ref_s3_data": null}], "text": "This capability makes it easy to change your RCAC definitions as you test the controls with your applications and identify tweaks that must be made to your RCAC implementation. However, re-creation of RCAC definitions does require an exclusive lock to be acquired on the table during the process.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.17169189453125, 307.0446472167969, 214.5146026611328, 320.0962829589844], "page": 130, "span": [0, 24], "__ref_s3_data": null}], "text": "7.1.3 Turning on and off", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.82620239257812, 235.83880615234375, 547.182861328125, 294.0208435058594], "page": 130, "span": [0, 435], "__ref_s3_data": null}], "text": "As described in 3.1.2, \"Enabling and activating RCAC\" on page 16, the SQL ALTER statement can turn on and off row permissions and column masks. The ALTER MASK and A LTER PERMISSION statements allow an individual row permission or column mask to be turned off with the DISABLE option and back on with the ENABLE option. The ALTER TABLE statement can deactivate enforcement of all the row permissions and column masks for a single table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [142.0942840576172, 171.44253540039062, 541.1311645507812, 218.12530517578125], "page": 130, "span": [0, 300], "__ref_s3_data": null}], "text": "Important: Although these capabilities make it easy to temporarily turn off RCAC security so that you can make environment or application changes, these processes require an exclusive lock to be obtained on a table. Therefore, this activity must be planned carefully to avoid disruptions and outages.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.17559051513672, 134.25454711914062, 183.9399871826172, 147.41159057617188], "page": 130, "span": [0, 18], "__ref_s3_data": null}], "text": "7.1.4 Regenerating", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.10853576660156, 99.14614868164062, 547.3272705078125, 121.23512268066406], "page": 130, "span": [0, 172], "__ref_s3_data": null}], "text": "DB2 also can regenerate an existing row permission or column mask. This regenerate option can be useful with more complex RCAC definitions that reference other DB2 objects.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.60557556152344, 27.93828010559082, 83.98200225830078, 37.56919860839844], "page": 130, "span": [0, 3], "__ref_s3_data": null}], "text": "114", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.59931182861328, 27.808855056762695, 339.8987731933594, 37.270851135253906], "page": 130, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.86671447753906, 639.279052734375, 547.2010498046875, 721.4535522460938], "page": 131, "span": [0, 498], "__ref_s3_data": null}], "text": "For example, consider a row permission on an ACCOUNTS table (PERMISSION1_ON_ACCOUNTS). The ACCOUNTS table row permission references and compares columns in the CUSTOMERS table. When the definition of the CUSTOMERS table changes, DB2 does not check to determine whether the change to the CUSTOMERS table breaks the ACCOUNTS table row permission. If this table definition change does break the row permission, an error does not surface until an application tries to read rows from the ACCOUNTS table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.08230590820312, 580.8378295898438, 547.1602172851562, 627.391845703125], "page": 131, "span": [0, 357], "__ref_s3_data": null}], "text": "Instead of waiting for an application to detect this error, the REGENERATE option can be used on the ACCOUNTS row permission. The REGENERATE option returns an error if the change in the CUSTOMERS table definition causes the row permission to be invalid. In this way, the row permission can be proactively corrected before an application discovers the error.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.27880096435547, 537.6162719726562, 536.6239013671875, 553.5975341796875], "page": 131, "span": [0, 57], "__ref_s3_data": null}], "text": "7.2 Managing tables with row permissions and column masks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0673370361328, 499.2984619140625, 547.2647094726562, 521.5249633789062], "page": 131, "span": [0, 94], "__ref_s3_data": null}], "text": "This section examines the object management considerations after RCAC is added to a DB2 table.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.25625610351562, 466.6372985839844, 205.3369598388672, 479.3948059082031], "page": 131, "span": [0, 22], "__ref_s3_data": null}], "text": "7.2.1 Save and restore", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.18902587890625, 407.25885009765625, 547.1621704101562, 453.1397399902344], "page": 131, "span": [0, 261], "__ref_s3_data": null}], "text": "Row permissions and column masks are stored in the DB2 table object itself, so they are automatically saved and restored when the DB2 table object is saved and restored. Therefore, no adjustments must be made to your database backup process to accommodate RCAC.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.97389221191406, 360.8598937988281, 547.2257080078125, 395.4291687011719], "page": 131, "span": [0, 268], "__ref_s3_data": null}], "text": "Save and restore processing works fine with RCAC if the RCAC definition does not reference other DB2 objects other than the table over which they are defined. When the RCAC definition has dependencies on other DB2 objects, the restore process is much more challenging.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [283.8978271484375, 27.887859344482422, 518.0120849609375, 37.17605972290039], "page": 131, "span": [0, 51], "__ref_s3_data": null}], "text": "Chapter 7. Row and Column Access Control management", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.184326171875, 27.93828010559082, 547.2587890625, 37.493892669677734], "page": 131, "span": [0, 3], "__ref_s3_data": null}], "text": "115", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.5724105834961, 27.93828010559082, 83.98200225830078, 37.497066497802734], "page": 132, "span": [0, 3], "__ref_s3_data": null}], "text": "116", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.69707489013672, 27.771995544433594, 340.0185546875, 37.28822326660156], "page": 132, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.94317626953125, 591.0215454101562, 546.4418334960938, 721.4165649414062], "page": 132, "span": [0, 872], "__ref_s3_data": null}], "text": "For example, assume that the BANKSCHEMA library (which is the system name or short name for the schema long name of BANK_SCHEMA) is saved and restored into a library named BANK_TEST. Recall from the example in 7.1.4, \"Regenerating\" on page 114 that the row permission on the ACCOUNTS table references the CUSTOMERS table (\u2026 SELECT C.CUSTOMER_ID FROM CUSTOMERS C \u2026). After the restore operation, the ACCOUNTS row permission still references the CUSTOMERS table in BANK_SCHEMA because DB2 explicitly qualifies all object references when the row permission or column mask is created. The restore processing does not change the explicit qualification from BANK_SCHEMA to BANK_TEST. As a result, the restored ACCOUNTS row permission now depends on DB2 objects residing in a different schema, even though it was not created that way originally. For more details, see Figure 7-1.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8966064453125, 325.75177001953125, 333.6893005371094, 335.1720886230469], "page": 132, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 7-1 Restoring tables to different schemas", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/112"}, {"prov": [{"bbox": [135.9806365966797, 239.0277557373047, 547.2655639648438, 309.4206237792969], "page": 132, "span": [0, 477], "__ref_s3_data": null}], "text": "The only way to fix this issue is to re-create the row permission or column mask after the restore operation. Re-creation of the row permission or column mask is required only for definitions that reference other DB2 objects, but it is simpler to re-create all of the RCAC definitions instead of a subset. For example, generate the SQL using System i Navigator, clear the \"Schema qualify names for objects\" and select the \"OR REPLACE clause\", and then run the generated script.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.11895751953125, 206.32223510742188, 196.41009521484375, 219.43421936035156], "page": 132, "span": [0, 21], "__ref_s3_data": null}], "text": "7.2.2 Table migration", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.7648468017578, 146.9532012939453, 538.5225830078125, 193.4559783935547], "page": 132, "span": [0, 329], "__ref_s3_data": null}], "text": "There are several IBM i CL commands, such as Move Object ( MOVOBJ ), Create Duplicate Object ( CRTDUPOBJ ), and Copy Library ( CPYLIB ), which are used to migrate a table from one library to another one. Often, this migration is done to create different versions of the table that can be used for development or testing purposes.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.84234619140625, 100.9890365600586, 542.6978149414062, 135.3026123046875], "page": 132, "span": [0, 255], "__ref_s3_data": null}], "text": "The migration of a table with RCAC has the same challenges as restore processing. If the RCAC definition references other DB2 objects, then IBM i CL commands do not change the schema names that are explicitly qualified by the DB2 internal RCAC processing.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9188232421875, 67.12712097167969, 524.2598876953125, 89.22772216796875], "page": 132, "span": [0, 132], "__ref_s3_data": null}], "text": "Again, re-creating the row permission or column mask is the only way to fix the issue of references to DB2 objects in other schemas.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.26126861572266, 705.5285034179688, 396.1838684082031, 721.473876953125], "page": 133, "span": [0, 42], "__ref_s3_data": null}], "text": "7.3 Monitoring and auditing function usage", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9081268310547, 619.106201171875, 546.3292236328125, 689.3731689453125], "page": 133, "span": [0, 526], "__ref_s3_data": null}], "text": "While establishing proper roles for users, separating duties using function usage IDs, and defining RCAC policies allows you to implement an effective and pervasive data access control scheme. How do you monitor and audit everyone who is involved in the implementation of that scheme? The answer is to use IBM i journaling. A special journal that is called QAUDJRN, also known as the audit journal , can provide a record and audit trail of many security relevant events that occur on the system, including RCAC-related events.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.88485717773438, 536.9307250976562, 547.2933349609375, 607.4329833984375], "page": 133, "span": [0, 502], "__ref_s3_data": null}], "text": "The tasks and operations of security administrators and database engineers who are collaborating can (and should) be effectively monitored and audited to ensure that the organization's data access control and governance policies are in place and enabled. For example, the Database Engineers can be involved in designing and developing functions and triggers that must be secured using the SECURE attribute. Otherwise, without properly securing functions and triggers, the RCAC controls can be bypassed.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.82188415527344, 491.26055908203125, 546.2147216796875, 525.3898315429688], "page": 133, "span": [0, 193], "__ref_s3_data": null}], "text": "A new journal entry type of \"AX\" for journal entry code \"T\" (audit trail) is now used for RCAC. More information about the journaling of RCAC operations can be found in the following documents:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.63038635253906, 474.28076171875, 396.8944396972656, 484.52532958984375], "page": 133, "span": [0, 70], "__ref_s3_data": null}], "text": "GLYPH IBM i Version 7.2 Journal Management Guide , found at:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.8509979248047, 445.02569580078125, 545.9945678710938, 467.0314025878906], "page": 133, "span": [0, 92], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzaki/rzakiprintthis .htm?lang=en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.46853637695312, 427.678466796875, 387.5917663574219, 437.95831298828125], "page": 133, "span": [0, 70], "__ref_s3_data": null}], "text": "GLYPH IBM i Version 7.2 Security Reference Guide , found at:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.4048309326172, 399.1193542480469, 546.0806274414062, 421.1822204589844], "page": 133, "span": [0, 90], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzarl/rzarlkickoff.h tm?lang=en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [284.0103454589844, 27.87947654724121, 518.0120849609375, 37.15098190307617], "page": 133, "span": [0, 51], "__ref_s3_data": null}], "text": "Chapter 7. Row and Column Access Control management", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.2394409179688, 27.93828010559082, 547.2671508789062, 37.50862121582031], "page": 133, "span": [0, 3], "__ref_s3_data": null}], "text": "117", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.73052215576172, 27.93828010559082, 83.98200225830078, 37.495025634765625], "page": 134, "span": [0, 3], "__ref_s3_data": null}], "text": "118", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.7788314819336, 27.781814575195312, 340.0904846191406, 37.32704544067383], "page": 134, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/113"}, {"prov": [{"bbox": [81.0, 517.019287109375, 115.13253021240234, 523.457275390625], "page": 135, "span": [0, 10], "__ref_s3_data": null}], "text": "Chapter 8.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 482.1217956542969, 479.93341064453125, 538.7562866210938], "page": 135, "span": [0, 34], "__ref_s3_data": null}], "text": "Designing and planning for success", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.87625122070312, 397.4395751953125, 538.4698486328125, 444.0196228027344], "page": 135, "span": [0, 285], "__ref_s3_data": null}], "text": "Although successfully implementing Row and Column Access Control (RCAC) is based on knowledge and skills, designing and planning are fundamental aspects. This chapter describes the need for a deep understanding of the technology, and good design, proper planning, and adequate testing.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.03575134277344, 375.6953125, 347.4120788574219, 386.0404052734375], "page": 135, "span": [0, 49], "__ref_s3_data": null}], "text": "The following topics are covered in this chapter:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.7108612060547, 358.8554992675781, 411.53955078125, 369.3179626464844], "page": 135, "span": [0, 70], "__ref_s3_data": null}], "text": "GLYPH Implementing RCAC with good design and proper planning", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.673095703125, 347.0796813964844, 284.81951904296875, 357.7647705078125], "page": 135, "span": [0, 46], "__ref_s3_data": null}], "text": "GLYPH DB2 for i Center of Excellence", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [63.9481315612793, 27.7833251953125, 257.24334716796875, 37.34917449951172], "page": 135, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.1824951171875, 27.93828010559082, 547.2587890625, 37.699867248535156], "page": 135, "span": [0, 3], "__ref_s3_data": null}], "text": "119", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [500.3999938964844, 661.8682861328125, 522.6177368164062, 698.831298828125], "page": 135, "span": [0, 1], "__ref_s3_data": null}], "text": "8", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.43937683105469, 702.308837890625, 544.5512084960938, 718.29345703125], "page": 136, "span": [0, 58], "__ref_s3_data": null}], "text": "8.1 Implementing RCAC with good design and proper planning", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.76779174804688, 652.1786499023438, 544.7098999023438, 685.9956665039062], "page": 136, "span": [0, 197], "__ref_s3_data": null}], "text": "By using RCAC, the row and column data that is returned to the requester can be controlled and governed by a set of data-centric policies that are defined with SQL and implemented within DB2 for i.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.20762634277344, 593.9513549804688, 545.359375, 640.1348876953125], "page": 136, "span": [0, 318], "__ref_s3_data": null}], "text": "RCAC provides fine-grained access control and is complementary to IBM i object-level security. With the new RCAC feature of DB2 for i, the database engineer, in partnership with the data owner and security officer, can ensure that users have access to the data based on their level of authorization and responsibility.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.89334106445312, 524.1407470703125, 547.2506103515625, 581.9553833007812], "page": 136, "span": [0, 365], "__ref_s3_data": null}], "text": "This situation also can include separation of duties, such as allowing the application developers to design and implement the solutions, but restricting them from accessing the production data based on policy. Just because someone writes and owns the program, it does not mean that they have access to all the sensitive data that their program can potentially read.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.08802795410156, 501.609619140625, 500.5572509765625, 512.1461791992188], "page": 136, "span": [0, 78], "__ref_s3_data": null}], "text": "This paper has described the following pervasive power and advantages of RCAC:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.58950805664062, 484.95391845703125, 429.4506530761719, 495.30487060546875], "page": 136, "span": [0, 79], "__ref_s3_data": null}], "text": "GLYPH Access can be controlled through simple or sophisticated logic.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6150665283203, 473.0511474609375, 351.649169921875, 483.7167053222656], "page": 136, "span": [0, 62], "__ref_s3_data": null}], "text": "GLYPH Virtually no application changes are required.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.59262084960938, 460.7434387207031, 491.7782287597656, 471.5057067871094], "page": 136, "span": [0, 93], "__ref_s3_data": null}], "text": "GLYPH The implementation of the access policy is part of the DB2 data access layer.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.65518188476562, 448.6240234375, 426.24566650390625, 459.2989196777344], "page": 136, "span": [0, 81], "__ref_s3_data": null}], "text": "GLYPH Table data is protected regardless of the interface that is used.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.66162109375, 436.8657531738281, 433.2464599609375, 447.2756652832031], "page": 136, "span": [0, 80], "__ref_s3_data": null}], "text": "GLYPH No user is inherently exempted from the access control policies.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.51698303222656, 424.99517822265625, 383.5718688964844, 435.4136657714844], "page": 136, "span": [0, 67], "__ref_s3_data": null}], "text": "GLYPH Groups of users can share policies and permissions.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.77777099609375, 330.777099609375, 547.245361328125, 413.263427734375], "page": 136, "span": [0, 623], "__ref_s3_data": null}], "text": "A deep understanding of the technology, and proper planning, good design, adequate testing, and monitored deployment are critical for success. This includes the usage of quality assurance testing, and realistic performance and scalability exercises that serve to demonstrate that all of your requirements are being met. As part of the verification process, the usage of in-depth proofs of concepts and proofs of technology are recommended, if not essential. When RCAC is activated, the results of queries can change. Anticipating this change and realizing the effects of RCAC before going live are of the utmost importance.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.85911560058594, 260.817626953125, 547.19580078125, 319.1590881347656], "page": 136, "span": [0, 380], "__ref_s3_data": null}], "text": "With the ever-growing value of data, and the vast and varied database technology that is available today, it is crucial to have a person or persons on staff who specialize in data-centric design, development, and deployment. This role and responsibility falls on the database engineer. With the availability of DB2 RCAC, the importance of full-time database engineering has grown.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.4874267578125, 217.91627502441406, 324.01275634765625, 233.66151428222656], "page": 136, "span": [0, 34], "__ref_s3_data": null}], "text": "8.2 DB2 for i Center of Excellence", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.7983856201172, 142.796875, 533.2318115234375, 201.5377197265625], "page": 136, "span": [0, 354], "__ref_s3_data": null}], "text": "To further assist you with understanding and implementing RCAC, the DB2 for i Center of Excellence team offers an RCAC education and consulting workshop. In addition to knowledge transfer, a working session allows for a review of your data access control requirements, review of the current environment, solution ideation, and high-level solution design.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8557586669922, 109.11979675292969, 547.2406005859375, 131.31752014160156], "page": 136, "span": [0, 114], "__ref_s3_data": null}], "text": "If you are interested in engaging with the DB2 for i Center of Excellence, contact Mike Cain at mcain@us.ibm.com .", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.63890075683594, 27.93828010559082, 83.98200225830078, 37.58589172363281], "page": 136, "span": [0, 3], "__ref_s3_data": null}], "text": "120", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.74649047851562, 27.775848388671875, 339.86614990234375, 37.33769607543945], "page": 136, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/114"}, {"prov": [{"bbox": [74.4000015258789, 517.019287109375, 115.15289306640625, 523.457275390625], "page": 137, "span": [0, 11], "__ref_s3_data": null}], "text": "Appendix A.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/115"}, {"prov": [{"bbox": [136.8000030517578, 481.3309326171875, 485.7971496582031, 538.6655883789062], "page": 137, "span": [0, 49], "__ref_s3_data": null}], "text": "Database definitions for the RCAC banking example", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.57388305664062, 397.5041198730469, 539.8473510742188, 444.1872253417969], "page": 137, "span": [0, 322], "__ref_s3_data": null}], "text": "This appendix provides the database definitions or DDLs to re-create the Row and Column Access Control (RCAC) scenario that is described in Chapter 4, \"Implementing Row and Column Access Control: Banking example\" on page 37. The script that is shown in Example A-1 is the DDL script that is used to implement this example.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.50365447998047, 376.32855224609375, 333.3837585449219, 386.06781005859375], "page": 137, "span": [0, 60], "__ref_s3_data": null}], "text": "Example A-1 DDL script to implement the RCAC banking example", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [63.48943328857422, 61.49855422973633, 541.1366577148438, 370.9224853515625], "page": 137, "span": [0, 1229], "__ref_s3_data": null}], "text": "/* Database Definitions for RCAC Bank Scenario */ /* Schema */ CREATE SCHEMA BANK_SCHEMA FOR SCHEMA BANKSCHEMA ; /* Global Variable */ CREATE VARIABLE BANK_SCHEMA.CUSTOMER_LOGIN_ID VARCHAR( 30) ; LABEL ON VARIABLE BANK_SCHEMA.CUSTOMER_LOGIN_ID IS 'Customer''s log in value passed by web application' ; /* Tables */ CREATE TABLE BANK_SCHEMA.CUSTOMERS ( CUSTOMER_ID FOR COLUMN CUSTO00001 INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), CUSTOMER_NAME FOR COLUMN CUSTO00002 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_ADDRESS FOR COLUMN CUSTO00003 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_CITY FOR COLUMN CUSTO00004 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_STATE FOR COLUMN CUSTO00005 CHAR(2) CCSID 37 NOT NULL , CUSTOMER_PHONE FOR COLUMN CUSTO00006 CHAR(10) CCSID 37 NOT NULL , CUSTOMER_EMAIL FOR COLUMN CUSTO00007 VARCHAR(30) CCSID 37 NOT NULL , CUSTOMER_TAX_ID FOR COLUMN CUSTO00008 CHAR(11) CCSID 37 NOT NULL , CUSTOMER_DRIVERS_LICENSE_NUMBER FOR COLUMN CUSTO00012 CHAR(13) CCSID 37 DEFAULT NULL , CUSTOMER_LOGIN_ID FOR COLUMN CUSTO00009 VARCHAR(30) CCSID 37 DEFAULT NULL , CUSTOMER_SECURITY_QUESTION FOR COLUMN CUSTO00010 VARCHAR(100) CCSID 37 DEFAULT NULL ,", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [63.879676818847656, 27.74163246154785, 257.24334716796875, 37.323570251464844], "page": 137, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.1998291015625, 27.93828010559082, 547.2587890625, 37.750850677490234], "page": 137, "span": [0, 3], "__ref_s3_data": null}], "text": "121", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [62.94416046142578, 73.59288024902344, 546.5369873046875, 721.5032348632812], "page": 138, "span": [0, 2754], "__ref_s3_data": null}], "text": "CUSTOMER_SECURITY_QUESTION_ANSWER FOR COLUMN CUSTO00011 VARCHAR(100) CCSID 37 DEFAULT NULL , INSERT_TIMESTAMP FOR COLUMN INSER00001 TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDAT00001 TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.CUSTOMER_ID_PK PRIMARY KEY( CUSTOMER_ID ) ) ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_LOGIN_ID_UK UNIQUE( CUSTOMER_LOGIN_ID ) ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_DRIVERS_LICENSE_CHECK CHECK( CUSTOMER_DRIVERS_LICENSE_NUMBER <> '*************' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_DRIVERS_LICENSE_NUMBER ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_EMAIL_CHECK CHECK( CUSTOMER_EMAIL <> '****@****' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_EMAIL ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_LOGIN_ID_CHECK CHECK( CUSTOMER_LOGIN_ID <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_LOGIN_ID = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_LOGIN_ID ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_SECURITY_QUESTION_CHECK CHECK( CUSTOMER_SECURITY_QUESTION_ANSWER <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_SECURITY_QUESTION_ANSWER = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_SECURITY_QUESTION_ANSWER ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_SECURITY_QUESTION_ANSWER CHECK( CUSTOMER_SECURITY_QUESTION <> '*****' ) ON INSERT VIOLATION SET CUSTOMER_SECURITY_QUESTION = DEFAULT ON UPDATE VIOLATION PRESERVE CUSTOMER_SECURITY_QUESTION ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ADD CONSTRAINT BANK_SCHEMA.CUSTOMER_TAX_ID_CHECK CHECK( CUSTOMER_TAX_ID <> 'XXX-XX-XXXX' AND SUBSTR ( CUSTOMER_TAX_ID , 1 , 7 ) <> 'XXX-XX-' ) ON UPDATE VIOLATION PRESERVE CUSTOMER_TAX_ID ; CREATE TABLE BANK_SCHEMA.ACCOUNTS ( ACCOUNT_ID INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), CUSTOMER_ID FOR COLUMN CUSTID INTEGER NOT NULL , ACCOUNT_NUMBER FOR COLUMN ACCOUNTNO VARCHAR(50) CCSID 37 NOT NULL , ACCOUNT_NAME FOR COLUMN ACCOUNTNAM CHAR(12) CCSID 37 NOT NULL , ACCOUNT_DATE_OPENED FOR COLUMN OPENDATE DATE DEFAULT CURRENT_DATE , ACCOUNT_DATE_CLOSED FOR COLUMN CLOSEDATE DATE DEFAULT NULL , ACCOUNT_CURRENT_BALANCE FOR COLUMN ACCTBAL DECIMAL(11, 2) NOT NULL DEFAULT 0 , INSERT_TIMESTAMP FOR COLUMN INSDATE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDDATE TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.ACCOUNT_ID_PK PRIMARY KEY( ACCOUNT_ID ) );", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.5491943359375, 27.93828010559082, 83.98200225830078, 37.62962341308594], "page": 138, "span": [0, 3], "__ref_s3_data": null}], "text": "122", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.43730926513672, 27.892166137695312, 340.0867614746094, 37.298946380615234], "page": 138, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [62.803646087646484, 62.641639709472656, 546.5366821289062, 721.3413696289062], "page": 139, "span": [0, 2313], "__ref_s3_data": null}], "text": "ALTER TABLE BANK_SCHEMA.ACCOUNTS ADD CONSTRAINT BANK_SCHEMA.ACCOUNT_CUSTOMER_ID_FK FOREIGN KEY( CUSTOMER_ID ) REFERENCES BANK_SCHEMA.CUSTOMERS ( CUSTO00001 ) ON DELETE RESTRICT ON UPDATE RESTRICT ; ALTER TABLE BANK_SCHEMA.ACCOUNTS ADD CONSTRAINT BANK_SCHEMA.ACCOUNT_NUMBER_CHECK CHECK( ACCOUNT_NUMBER <> '*****' ) ON UPDATE VIOLATION PRESERVE ACCOUNT_NUMBER ; CREATE TABLE BANK_SCHEMA.TRANSACTIONS FOR SYSTEM NAME TRANS ( TRANSACTION_ID FOR COLUMN TRANS00001 INTEGER GENERATED ALWAYS AS IDENTITY ( START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE NO CYCLE NO ORDER CACHE 20 ), ACCOUNT_ID INTEGER NOT NULL , TRANSACTION_TYPE FOR COLUMN TRANS00002 CHAR(1) CCSID 37 NOT NULL , TRANSACTION_DATE FOR COLUMN TRANS00003 DATE NOT NULL DEFAULT CURRENT_DATE , TRANSACTION_TIME FOR COLUMN TRANS00004 TIME NOT NULL DEFAULT CURRENT_TIME , TRANSACTION_AMOUNT FOR COLUMN TRANS00005 DECIMAL(11, 2) NOT NULL , INSERT_TIMESTAMP FOR COLUMN INSER00001 TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP IMPLICITLY HIDDEN , UPDATE_TIMESTAMP FOR COLUMN UPDAT00001 TIMESTAMP GENERATED ALWAYS FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP NOT NULL IMPLICITLY HIDDEN , CONSTRAINT BANK_SCHEMA.TRANSACTION_ID_PK PRIMARY KEY( TRANSACTION_ID ) ) ; ALTER TABLE BANK_SCHEMA.TRANSACTIONS ADD CONSTRAINT BANK_SCHEMA.TRANSACTIONS_ACCOUNT_ID_FK FOREIGN KEY( ACCOUNT_ID ) REFERENCES BANK_SCHEMA.ACCOUNTS ( ACCOUNT_ID ) ON DELETE RESTRICT ON UPDATE RESTRICT ; /* Permissions and Masks */ CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ) ) ENFORCED FOR ALL ACCESS ENABLE ; CREATE MASK BANK_SCHEMA.MASK_EMAIL_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_EMAIL RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_EMAIL WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_EMAIL ELSE '****@****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_TAX_ID_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_TAX_ID RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [256.8025207519531, 27.89881134033203, 517.9058227539062, 37.0754508972168], "page": 139, "span": [0, 61], "__ref_s3_data": null}], "text": "Appendix A. Database definitions for the RCAC banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.2589111328125, 27.93828010559082, 547.2587890625, 37.56678009033203], "page": 139, "span": [0, 3], "__ref_s3_data": null}], "text": "123", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.7410888671875, 85.18326568603516, 500.697265625, 720.6396484375], "page": 140, "span": [0, 1998], "__ref_s3_data": null}], "text": "THEN C . CUSTOMER_TAX_ID WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN ( 'XXX-XX-' CONCAT QSYS2 . SUBSTR ( C . CUSTOMER_TAX_ID , 8 , 4 ) ) WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_TAX_ID ELSE 'XXX-XX-XXXX' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_DRIVERS_LICENSE_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_DRIVERS_LICENSE_NUMBER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_DRIVERS_LICENSE_NUMBER ELSE '*************' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_LOGIN_ID_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_LOGIN_ID RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_LOGIN_ID WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_LOGIN_ID ELSE '*****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_SECURITY_QUESTION_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_SECURITY_QUESTION RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION ELSE '*****' END ENABLE ; CREATE MASK BANK_SCHEMA.MASK_SECURITY_QUESTION_ANSWER_ON_CUSTOMERS ON BANK_SCHEMA.CUSTOMERS AS C FOR COLUMN CUSTOMER_SECURITY_QUESTION_ANSWER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION_ANSWER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN C . CUSTOMER_SECURITY_QUESTION_ANSWER ELSE '*****' END ENABLE ; ALTER TABLE BANK_SCHEMA.CUSTOMERS ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL ;", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [64.56851196289062, 27.93828010559082, 83.98200225830078, 37.54389572143555], "page": 140, "span": [0, 3], "__ref_s3_data": null}], "text": "124", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.52511596679688, 27.84050178527832, 339.9233093261719, 37.32281494140625], "page": 140, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.56539535522461, 192.05337524414062, 530.8372802734375, 721.4015502929688], "page": 141, "span": [0, 1533], "__ref_s3_data": null}], "text": "CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_ACCOUNTS ON BANK_SCHEMA.ACCOUNTS AS A FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( A . CUSTOMER_ID IN ( SELECT C . CUSTOMER_ID FROM BANK_SCHEMA . CUSTOMERS C WHERE C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ENFORCED FOR ALL ACCESS ENABLE ; CREATE MASK BANK_SCHEMA.MASK_ACCOUNT_NUMBER_ON_ACCOUNTS ON BANK_SCHEMA.ACCOUNTS AS A FOR COLUMN ACCOUNT_NUMBER RETURN CASE WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'ADMIN' ) = 1 THEN A . ACCOUNT_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'TELLER' ) = 1 THEN A . ACCOUNT_NUMBER WHEN QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 THEN A . ACCOUNT_NUMBER ELSE '*****' END ENABLE ; ALTER TABLE BANK_SCHEMA.ACCOUNTS ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL ; CREATE PERMISSION BANK_SCHEMA.PERMISSION1_ON_TRANSACTIONS ON BANK_SCHEMA.TRANSACTIONS AS T FOR ROWS WHERE ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'DBE' , 'ADMIN' , 'TELLER' ) = 1 ) OR ( QSYS2 . VERIFY_GROUP_FOR_USER ( SESSION_USER , 'CUSTOMER' ) = 1 AND ( T . ACCOUNT_ID IN ( SELECT A . ACCOUNT_ID FROM BANK_SCHEMA . ACCOUNTS A WHERE A . CUSTOMER_ID IN ( SELECT C . CUSTOMER_ID FROM BANK_SCHEMA . CUSTOMERS C WHERE C . CUSTOMER_LOGIN_ID = BANK_SCHEMA . CUSTOMER_LOGIN_ID ENFORCED FOR ALL ACCESS ENABLE ; ALTER TABLE BANK_SCHEMA.TRANSACTIONS ACTIVATE ROW ACCESS CONTROL ; /* END */", "type": "paragraph", "name": "Code", "font": null}, {"prov": [{"bbox": [256.91644287109375, 27.90251350402832, 517.9058227539062, 37.15294647216797], "page": 141, "span": [0, 61], "__ref_s3_data": null}], "text": "Appendix A. Database definitions for the RCAC banking example", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.2833251953125, 27.93828010559082, 547.2587890625, 37.5584716796875], "page": 141, "span": [0, 3], "__ref_s3_data": null}], "text": "125", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.75247192382812, 27.93828010559082, 83.98200225830078, 37.44638442993164], "page": 142, "span": [0, 3], "__ref_s3_data": null}], "text": "126", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.78695678710938, 27.779624938964844, 340.1011047363281, 37.32666778564453], "page": 142, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.80000305175781, 695.1260375976562, 299.2008056640625, 718.6505126953125], "page": 143, "span": [0, 20], "__ref_s3_data": null}], "text": "Related publications", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.890625, 637.7537231445312, 530.0675048828125, 660.0474853515625], "page": 143, "span": [0, 150], "__ref_s3_data": null}], "text": "The publications that are listed in this section are considered suitable for a more detailed description of the topics that are covered in this paper.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.57539367675781, 594.4069213867188, 205.97418212890625, 610.588623046875], "page": 143, "span": [0, 18], "__ref_s3_data": null}], "text": "Other publications", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9392547607422, 567.9268188476562, 413.18115234375, 578.0560913085938], "page": 143, "span": [0, 63], "__ref_s3_data": null}], "text": "These publications are relevant as further information sources:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.5774383544922, 550.9959716796875, 414.05657958984375, 561.1710815429688], "page": 143, "span": [0, 74], "__ref_s3_data": null}], "text": "GLYPH IBM DB2 for i indexing methods and strategies white paper:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.6581268310547, 521.37451171875, 545.9945678710938, 543.9347534179688], "page": 143, "span": [0, 108], "__ref_s3_data": null}], "text": "http://www.ibm.com/partnerworld/wps/servlet/ContentHandler/stg_ast_sys_wp_db2_i _indexing_methods_strategies", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6566619873047, 505.1793212890625, 299.7695007324219, 515.2197875976562], "page": 143, "span": [0, 49], "__ref_s3_data": null}], "text": "GLYPH IBM i Memo to Users Version 7.2 :", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.8552703857422, 487.74090576171875, 536.167236328125, 497.69171142578125], "page": 143, "span": [0, 77], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgmtu.htm", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4134979248047, 471.1599426269531, 371.4087829589844, 481.3167724609375], "page": 143, "span": [0, 65], "__ref_s3_data": null}], "text": "GLYPH IBM i Version 7.2 DB2 for i SQL Reference Guide :", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.59320068359375, 442.22589111328125, 545.9945678710938, 464.06427001953125], "page": 143, "span": [0, 86], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/db2/rbafzintro.htm?l ang=en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6075439453125, 424.78717041015625, 355.629150390625, 435.43304443359375], "page": 143, "span": [0, 60], "__ref_s3_data": null}], "text": "GLYPH IBM i Version 7.2 Journal Management Guide :", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.7898712158203, 396.0626525878906, 545.9945678710938, 417.7483215332031], "page": 143, "span": [0, 92], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzaki/rzakiprintthis .htm?lang=en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.41650390625, 378.9584045410156, 346.3712158203125, 389.6202087402344], "page": 143, "span": [0, 60], "__ref_s3_data": null}], "text": "GLYPH IBM i Version 7.2 Security Reference Guide :", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.42144775390625, 349.67706298828125, 545.9945678710938, 372.0034484863281], "page": 143, "span": [0, 90], "__ref_s3_data": null}], "text": "http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzarl/rzarlkickoff.h tm?lang=en", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.59217834472656, 306.89630126953125, 195.1357421875, 322.5559997558594], "page": 143, "span": [0, 16], "__ref_s3_data": null}], "text": "Online resources", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.91726684570312, 280.17840576171875, 399.3615417480469, 290.130126953125], "page": 143, "span": [0, 59], "__ref_s3_data": null}], "text": "These websites are relevant as further information sources:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.32025146484375, 234.16583251953125, 545.9945678710938, 273.6405944824219], "page": 143, "span": [0, 168], "__ref_s3_data": null}], "text": "GLYPH Database programming topic of the IBM i 7.2 IBM Knowledge Center: http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_72/rzahg/rzahgdbp.htm?l ang=en", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.65589904785156, 216.89540100097656, 287.6542053222656, 227.5020294189453], "page": 143, "span": [0, 46], "__ref_s3_data": null}], "text": "GLYPH Identity Theft Resource Center", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.2787628173828, 199.96951293945312, 291.11822509765625, 209.83056640625], "page": 143, "span": [0, 28], "__ref_s3_data": null}], "text": "http://www.idtheftcenter.org", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.48793029785156, 183.16000366210938, 231.24366760253906, 193.20037841796875], "page": 143, "span": [0, 33], "__ref_s3_data": null}], "text": "GLYPH Ponemon Institute", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.60269165039062, 165.8961181640625, 266.09869384765625, 176.1220703125], "page": 143, "span": [0, 23], "__ref_s3_data": null}], "text": "http://www.ponemon.org/", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.9522819519043, 27.784912109375, 257.24334716796875, 37.28429412841797], "page": 143, "span": [0, 48], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2014. All rights reserved.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [530.2672119140625, 27.93828010559082, 547.2587890625, 37.63324737548828], "page": 143, "span": [0, 3], "__ref_s3_data": null}], "text": "127", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.7861328125, 705.8480834960938, 172.86196899414062, 721.7124633789062], "page": 144, "span": [0, 13], "__ref_s3_data": null}], "text": "Help from IBM", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.20310974121094, 623.4984130859375, 262.6285400390625, 689.3955078125], "page": 144, "span": [0, 80], "__ref_s3_data": null}], "text": "IBM Support and downloads ibm.com /support IBM Global Services ibm.com /services", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.65906524658203, 27.93828010559082, 83.98200225830078, 37.34457015991211], "page": 144, "span": [0, 3], "__ref_s3_data": null}], "text": "128", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [98.72059631347656, 27.806060791015625, 339.8825988769531, 37.2719841003418], "page": 144, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [287.2200012207031, 741.251953125, 414.24481201171875, 763.4519653320312], "page": 146, "span": [0, 10], "__ref_s3_data": null}], "text": "Back cover", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [27.0, 651.225830078125, 447.3600158691406, 719.8479614257812], "page": 146, "span": [0, 54], "__ref_s3_data": null}], "text": "Row and Column Access Control Support in IBM DB2 for i", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [26.516427993774414, 524.8208618164062, 127.443603515625, 550.765380859375], "page": 146, "span": [0, 40], "__ref_s3_data": null}], "text": "Implement roles and separation of duties", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [26.451623916625977, 469.1280212402344, 120.283203125, 508.38104248046875], "page": 146, "span": [0, 40], "__ref_s3_data": null}], "text": "Leverage row permissions on the database", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [26.34380340576172, 413.14801025390625, 121.44960021972656, 452.6860656738281], "page": 146, "span": [0, 40], "__ref_s3_data": null}], "text": "Protect columns by defining column masks", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [152.0505828857422, 468.4081115722656, 414.084228515625, 550.2308959960938], "page": 146, "span": [0, 464], "__ref_s3_data": null}], "text": "This IBM Redpaper publication provides information about the IBM i 7.2 feature of IBM DB2 for i Row and Column Access Control (RCAC). It offers a broad description of the function and advantages of controlling access to data in a comprehensive and transparent way. This publication helps you understand the capabilities of RCAC and provides examples of defining, creating, and implementing the row permissions and column masks in a relational database environment.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [152.21202087402344, 403.1996765136719, 414.173828125, 461.3445129394531], "page": 146, "span": [0, 309], "__ref_s3_data": null}], "text": "This paper is intended for database engineers, data-centric application developers, and security officers who want to design and implement RCAC as a part of their data control and governance policy. A solid background in IBM i object level security, DB2 for i relational database concepts, and SQL is assumed.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [170.9120635986328, 152.3369903564453, 232.1637420654297, 161.34877014160156], "page": 146, "span": [0, 12], "__ref_s3_data": null}], "text": "REDP-5110-00", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/116"}, {"name": "Picture", "type": "figure", "$ref": "#/figures/117"}, {"prov": [{"bbox": [466.37554931640625, 489.8393859863281, 559.809326171875, 544.2816772460938], "page": 146, "span": [0, 44], "__ref_s3_data": null}], "text": "INTERNATIONAL TECHNICAL SUPPORT ORGANIZATION", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [466.6063537597656, 405.52801513671875, 587.38916015625, 440.42242431640625], "page": 146, "span": [0, 60], "__ref_s3_data": null}], "text": "BUILDING TECHNICAL INFORMATION BASED ON PRACTICAL EXPERIENCE", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [466.4356689453125, 250.36593627929688, 587.5205078125, 392.952880859375], "page": 146, "span": [0, 323], "__ref_s3_data": null}], "text": "IBM Redbooks are developed by the IBM International Technical Support Organization. Experts from IBM, Customers and Partners from around the world create timely technical information based on realistic scenarios. Specific recommendations are provided to help you implement IT solutions more effectively in your environment.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [467.00830078125, 190.48809814453125, 570.947998046875, 214.1653289794922], "page": 146, "span": [0, 39], "__ref_s3_data": null}], "text": "For more information: ibm.com /redbooks", "type": "paragraph", "name": "Text", "font": null}], "figures": [{"prov": [{"bbox": [513.4270629882812, 737.29345703125, 586.1854248046875, 765.7447509765625], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [13.370661735534668, 87.77820587158203, 583.5319213867188, 508.59100341796875], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [314.70001220703125, 17.213214874267578, 581.3467407226562, 82.39669799804688], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [79.84638214111328, 696.4725341796875, 142.80374145507812, 720.9906005859375], "page": 3, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.10924530029297, 102.9841079711914, 258.7627258300781, 188.21194458007812], "page": 11, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [309.1800537109375, 608.730224609375, 371.9783020019531, 634.2922973632812], "page": 12, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [310.2225646972656, 416.8152770996094, 327.72900390625, 434.5222473144531], "page": 12, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [142.5128631591797, 288.7432556152344, 251.5660858154297, 416.8715515136719], "page": 13, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [145.66848754882812, 156.9557647705078, 251.9022216796875, 264.6454772949219], "page": 13, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [141.954833984375, 607.9212036132812, 249.82046508789062, 714.8308715820312], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [141.70733642578125, 472.09759521484375, 251.4536895751953, 599.3330078125], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [141.8423614501953, 338.135009765625, 251.25462341308594, 447.3796691894531], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [141.7443389892578, 223.1272430419922, 249.32614135742188, 329.49169921875], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [141.80172729492188, 68.0157470703125, 250.79905700683594, 177.24551391601562], "page": 14, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [142.55819702148438, 599.4522094726562, 251.8630828857422, 714.5342407226562], "page": 15, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [142.5027618408203, 465.3537902832031, 252.00502014160156, 575.7843017578125], "page": 15, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.604942321777344, 572.5767211914062, 238.58961486816406, 722.0762939453125], "page": 17, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.91238403320312, 92.01444244384766, 491.48321533203125, 296.0242004394531], "page": 19, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 1-1 All-or-nothing access to the rows of a table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.976806640625, 103.38056945800781, 547.004638671875, 416.27178955078125], "page": 20, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 1-2 Existing row and column controls", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.40062713623047, 568.2523803710938, 238.8854217529297, 722.2219848632812], "page": 23, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.838199615478516, 576.9904174804688, 238.931640625, 721.8954467773438], "page": 29, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [137.5907440185547, 381.64495849609375, 545.1253051757812, 684.1454467773438], "page": 31, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 3-1 CREATE PERMISSION SQL statement", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.6407928466797, 377.9161682128906, 545.9530029296875, 672.5060424804688], "page": 32, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-2 CREATE MASK SQL statement", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.6437530517578, 444.2763977050781, 507.3736267089844, 714.1329345703125], "page": 33, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 3-3 ALTER PERMISSION and ALTER MASK SQL statements", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.75877380371094, 69.6054458618164, 515.210205078125, 291.2005310058594], "page": 33, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-4 ALTER TABLE SQL statement", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.23974609375, 195.40040588378906, 302.0709533691406, 408.1564025878906], "page": 35, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 3-5 Special registers and adopted authority", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.88389587402344, 421.3089294433594, 227.963134765625, 454.3084716796875], "page": 40, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 3-7 Number of employees", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.74125671386719, 302.87969970703125, 547.218505859375, 490.070068359375], "page": 42, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 3-9 Row permissions that are shown in System i Navigator", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.14590454101562, 622.0357055664062, 547.3065185546875, 696.4837036132812], "page": 44, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 3-10 Column masks shown in System i Navigator", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.97254943847656, 145.7752227783203, 530.3119506835938, 364.4224548339844], "page": 44, "span": [0, 65], "__ref_s3_data": null}], "text": "Figure 3-11 Selecting the EMPLOYEES table from System i Navigator", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.05963134765625, 453.9345703125, 547.3814086914062, 684.5645751953125], "page": 45, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 3-12 RCAC enabled on the EMPLOYEES table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [137.14126586914062, 210.01055908203125, 225.73944091796875, 243.2476043701172], "page": 45, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 3-13 Count of EMPLOYEES by HR", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.89012145996094, 98.30632019042969, 220.545166015625, 130.6866912841797], "page": 45, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 3-14 Count of EMPLOYEES by a manager", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [137.26792907714844, 651.4454956054688, 226.28631591796875, 684.01416015625], "page": 46, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 3-15 Count of EMPLOYEES by an employee", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.83544921875, 540.1837768554688, 228.46414184570312, 571.7149658203125], "page": 46, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 3-16 Count of EMPLOYEES by a consultant", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.2381706237793, 572.0254516601562, 238.3625946044922, 722.2628173828125], "page": 53, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.172607421875, 304.9477844238281, 495.38751220703125, 561.9093017578125], "page": 55, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-1 Internet banking example", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.94985961914062, 490.8873291015625, 451.76129150390625, 664.9793090820312], "page": 58, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-4 Data model of the banking scenario", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.25625610351562, 429.6432189941406, 417.9416198730469, 614.1981201171875], "page": 59, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-6 CUSTOMERS table attributes", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.90573120117188, 486.3748779296875, 533.0628051757812, 660.2156372070312], "page": 60, "span": [0, 59], "__ref_s3_data": null}], "text": "Figure 4-8 Reviewing the constraints on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.9095458984375, 230.82260131835938, 456.8887939453125, 420.815185546875], "page": 60, "span": [0, 36], "__ref_s3_data": null}], "text": "Figure 4-9 ACCOUNTS table attributes", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.610107421875, 495.3265686035156, 451.55133056640625, 684.3562622070312], "page": 62, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-12 TRANSACTIONS table attributes", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.1991729736328, 300.0424499511719, 344.15631103515625, 537.9190673828125], "page": 63, "span": [0, 38], "__ref_s3_data": null}], "text": "Figure 4-15 Application administration", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.1958465576172, 402.66937255859375, 527.11962890625, 684.6141967773438], "page": 64, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 4-16 Application administration for IBM i", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.93495178222656, 180.69932556152344, 528.1470947265625, 337.62982177734375], "page": 64, "span": [0, 77], "__ref_s3_data": null}], "text": "Figure 4-17 Customizing the Database Security Administrator function usage ID", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.7631378173828, 389.99383544921875, 485.8440246582031, 672.9370727539062], "page": 65, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-18 Customize Access window", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.96632385253906, 208.6089324951172, 483.9786682128906, 321.49188232421875], "page": 65, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-19 Function usage ID Database Security Administrator customized", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.90785217285156, 204.0828094482422, 358.8966369628906, 350.76531982421875], "page": 66, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 4-21 Creating group profiles", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.19943237304688, 430.79339599609375, 474.2098388671875, 656.1209716796875], "page": 67, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 4-22 Creating group profiles and adding users", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.22731018066406, 241.38636779785156, 269.4501037597656, 366.38726806640625], "page": 67, "span": [0, 40], "__ref_s3_data": null}], "text": "Figure 4-23 Newly created group profiles", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.93331909179688, 386.9024658203125, 455.8020324707031, 596.5308837890625], "page": 68, "span": [0, 38], "__ref_s3_data": null}], "text": "Figure 4-24 Creating a global variable", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.36389923095703, 83.51372528076172, 546.5014038085938, 298.5506896972656], "page": 68, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-25 Creating a global variable called CUSTOMER_LOGIN_ID", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.95562744140625, 553.2886352539062, 346.0622863769531, 672.4024047851562], "page": 69, "span": [0, 72], "__ref_s3_data": null}], "text": "Figure 4-26 Setting permissions on the CUSTOMER_LOGIN_ID global variable", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.1831817626953, 216.0868377685547, 547.692138671875, 488.07440185546875], "page": 69, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 4-27 Setting change permissions for Webuser on the CUSTOMER_LOGIN_ID global variable", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.78514099121094, 371.8719177246094, 467.8066711425781, 630.3035888671875], "page": 70, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-28 Selecting new row permissions", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.4442901611328, 264.9051513671875, 508.3151550292969, 533.2034301757812], "page": 71, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 4-29 New row permissions on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.61784362792969, 209.0849609375, 540.3091430664062, 520.9561767578125], "page": 72, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 4-30 New row permissions on the ACCOUNTS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.03852844238281, 80.2489013671875, 536.1629638671875, 444.4478759765625], "page": 73, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-31 New row permissions on the TRANSACTIONS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.7589111328125, 520.03076171875, 545.423095703125, 672.0924072265625], "page": 74, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 4-32 List of row permissions on BANK_SCHEMA", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.82342529296875, 222.84324645996094, 457.5223388671875, 396.602783203125], "page": 74, "span": [0, 34], "__ref_s3_data": null}], "text": "Figure 4-33 Creating a column mask", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.43399047851562, 188.49737548828125, 533.2398681640625, 602.9105834960938], "page": 75, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-34 Defining a column mask on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.82819747924805, 610.9564819335938, 547.4437255859375, 684.7066040039062], "page": 76, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-35 List of column masks on BANK_SCHEMA", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.8809814453125, 322.705810546875, 544.9537963867188, 430.2528991699219], "page": 76, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-36 Definition of the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.15058898925781, 191.338623046875, 546.9588623046875, 258.4830322265625], "page": 76, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-37 Adding a check constraint", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.886878967285156, 141.33029174804688, 543.0753173828125, 622.1634521484375], "page": 77, "span": [0, 68], "__ref_s3_data": null}], "text": "Figure 4-38 Specifying a new check constraint on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.629364013671875, 407.89910888671875, 547.2354125976562, 684.9134521484375], "page": 78, "span": [0, 51], "__ref_s3_data": null}], "text": "Figure 4-39 Check constraint on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.99116897583008, 192.2274627685547, 543.6686401367188, 330.8082580566406], "page": 78, "span": [0, 60], "__ref_s3_data": null}], "text": "Figure 4-40 List of check constraints on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.2755584716797, 458.7856140136719, 546.0454711914062, 618.2598876953125], "page": 79, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 4-41 Enabling RCAC on the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.9549560546875, 218.64797973632812, 534.6990356445312, 382.1351623535156], "page": 79, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-42 Enabling RCAC on ACCOUNTS", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.10528564453125, 515.156494140625, 547.2327880859375, 672.4461059570312], "page": 80, "span": [0, 41], "__ref_s3_data": null}], "text": "Figure 4-43 Enabling RCAC on TRANSACTIONS", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.80467987060547, 210.67916870117188, 546.7496948242188, 379.74896240234375], "page": 80, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-44 Row permissions after enabling RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.10069274902344, 553.5065307617188, 467.5866394042969, 684.43359375], "page": 81, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-45 Selecting row permission definition", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.4017105102539, 196.437255859375, 502.2218017578125, 464.38470458984375], "page": 81, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 4-46 Search condition of the QIBM_DEFAULT row permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.19024658203125, 106.22469329833984, 354.0876770019531, 227.25241088867188], "page": 83, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 4-50 SECURITY session user", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.16693115234375, 353.8003845214844, 546.63623046875, 501.63983154296875], "page": 84, "span": [0, 71], "__ref_s3_data": null}], "text": "Figure 4-52 SQL statement that is run by the SECURITY user - no results", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.6452178955078, 572.7899780273438, 389.1011047363281, 684.0476684570312], "page": 85, "span": [0, 78], "__ref_s3_data": null}], "text": "Figure 4-54 Number of rows that the TELLER user can see in the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.4497833251953, 569.8001098632812, 352.79534912109375, 652.5623779296875], "page": 86, "span": [0, 30], "__ref_s3_data": null}], "text": "Figure 4-56 ADMIN session user", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.02879333496094, 117.78546905517578, 354.0118713378906, 207.25164794921875], "page": 87, "span": [0, 32], "__ref_s3_data": null}], "text": "Figure 4-59 WEBUSER session user", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.8178253173828, 570.9151611328125, 547.5137329101562, 672.1438598632812], "page": 88, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 4-60 Setting the global variable CUSTOMER_LOGIN_ID", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.01531982421875, 272.7074890136719, 396.3285217285156, 505.56903076171875], "page": 88, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 4-61 Viewing the global variable value", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.8096160888672, 107.80245971679688, 381.8302307128906, 207.7640838623047], "page": 88, "span": [0, 74], "__ref_s3_data": null}], "text": "Figure 4-62 Number of rows that the WEBUSER can see in the CUSTOMERS table", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.06761169433594, 293.2961120605469, 400.2455139160156, 655.4537963867188], "page": 92, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 4-67 Visual Explain with no RCAC enabled", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.07376098632812, 314.4920349121094, 545.792724609375, 672.5439453125], "page": 93, "span": [0, 44], "__ref_s3_data": null}], "text": "Figure 4-68 Visual Explain with RCAC enabled", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.46057891845703, 127.6042709350586, 506.0116271972656, 238.0028076171875], "page": 93, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 4-69 Index advice with no RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.14990997314453, 556.6690063476562, 508.7397766113281, 672.6431884765625], "page": 94, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 4-70 Index advice with RCAC enabled", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.52901077270508, 567.5162963867188, 238.8768310546875, 722.1326904296875], "page": 95, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.02496337890625, 71.25074005126953, 527.1937866210938, 354.32598876953125], "page": 97, "span": [0, 56], "__ref_s3_data": null}], "text": "Figure 5-1 Accidental update with masked values scenario", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.44626235961914, 566.1923828125, 238.71536254882812, 721.9947509765625], "page": 101, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.21511840820312, 262.2598876953125, 413.1637878417969, 536.272216796875], "page": 103, "span": [0, 57], "__ref_s3_data": null}], "text": "Figure 6-2 Masking differences between Fieldproc and RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.0615234375, 502.3581237792969, 497.5985412597656, 616.2451782226562], "page": 104, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 6-3 RCAC and data movement", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.62368774414062, 386.83355712890625, 491.8061828613281, 636.6663208007812], "page": 105, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 6-4 RCAC effects on data movement from SOURCE", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.34181213378906, 380.9286193847656, 499.1208190917969, 636.5926513671875], "page": 106, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 6-5 RCAC effects on data movement on TARGET", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.50173950195312, 406.5937805175781, 501.6983947753906, 660.2413330078125], "page": 107, "span": [0, 61], "__ref_s3_data": null}], "text": "Figure 6-6 RCAC effects on data movement on SOURCE and TARGET", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.9111785888672, 475.1238708496094, 503.0137634277344, 684.5241088867188], "page": 108, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 6-7 Set A and set B with row permissions", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.1741485595703, 169.27420043945312, 465.0877990722656, 381.4338073730469], "page": 108, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 6-8 Inner join without RCAC permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.10928344726562, 371.35198974609375, 470.06805419921875, 604.3631591796875], "page": 109, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 6-9 Inner join with RCAC permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.9721221923828, 419.20843505859375, 478.57989501953125, 634.5538940429688], "page": 110, "span": [0, 46], "__ref_s3_data": null}], "text": "Figure 6-10 Outer join without RCAC permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.8201904296875, 370.0613708496094, 483.43121337890625, 608.5155639648438], "page": 111, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 6-11 Outer join with RCAC permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.06724548339844, 397.1817626953125, 484.16094970703125, 635.3245239257812], "page": 112, "span": [0, 50], "__ref_s3_data": null}], "text": "Figure 6-12 Exception join without RCAC permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.15040588378906, 72.79753875732422, 485.8376770019531, 308.3733825683594], "page": 112, "span": [0, 47], "__ref_s3_data": null}], "text": "Figure 6-13 Exception join with RCAC permission", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.894283294677734, 442.3710021972656, 546.3536376953125, 696.03125], "page": 114, "span": [0, 58], "__ref_s3_data": null}], "text": "Figure 6-14 Visual Explain indicating that RCAC is applied", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.08663177490234, 249.16531372070312, 546.7911376953125, 389.9174499511719], "page": 114, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-15 SQL Performance Monitor", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.8217658996582, 106.15225982666016, 546.2924194335938, 184.86444091796875], "page": 114, "span": [0, 67], "__ref_s3_data": null}], "text": "Figure 6-16 SQL Performance Monitor indicating that RCAC is applied", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.91090393066406, 573.9140625, 547.3914794921875, 684.7344360351562], "page": 115, "span": [0, 63], "__ref_s3_data": null}], "text": "Figure 6-17 SQL Performance Monitor showing statements and RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.14181518554688, 412.9127197265625, 547.3031616210938, 684.6536254882812], "page": 116, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 6-18 Index advice and RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.29962921142578, 236.79556274414062, 510.150390625, 348.3416442871094], "page": 116, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6-19 Index advisor based on the RCAC rule", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.72079467773438, 260.0303955078125, 491.8398132324219, 529.7687377929688], "page": 118, "span": [0, 42], "__ref_s3_data": null}], "text": "Figure 6-21 View definition and user query", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.38958740234375, 402.56231689453125, 509.0123291015625, 684.0966186523438], "page": 119, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 6-22 Query rewrite with RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.09100341796875, 390.8775634765625, 505.8097229003906, 660.002197265625], "page": 122, "span": [0, 45], "__ref_s3_data": null}], "text": "Figure 6-23 Native record access with no RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.10159301757812, 386.76556396484375, 513.560791015625, 660.6875610351562], "page": 123, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6-24 Native record level access with RCAC", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.8210906982422, 230.38075256347656, 509.4689636230469, 516.169677734375], "page": 127, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 6-25 Object-level security and RCAC permissions", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.58393096923828, 573.9718627929688, 238.72312927246094, 722.0296630859375], "page": 129, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.06692504882812, 338.255859375, 523.4048461914062, 576.7738037109375], "page": 132, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 7-1 Restoring tables to different schemas", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.37837600708008, 571.7457885742188, 238.6834716796875, 721.9522705078125], "page": 135, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.35563278198242, 567.583984375, 238.68637084960938, 721.7655639648438], "page": 137, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [475.0350341796875, 648.4136962890625, 547.1314697265625, 720.0004272460938], "page": 137, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [485.2432861328125, 737.3182983398438, 566.4600219726562, 766.057373046875], "page": 146, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [474.6000061035156, 602.410400390625, 592.139892578125, 712.2808837890625], "page": 146, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}], "tables": [{"prov": [{"bbox": [134.7429962158203, 76.51283264160156, 549.8472290039062, 660.2257080078125], "page": 5, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 43, "data": [[{"bbox": [136.8000030517578, 650.1383666992188, 172.89404296875, 659.3513793945312], "spans": [[0, 0]], "text": "Notices", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [175.01951599121094, 650.1383666992188, 547.1898193359375, 659.3513793945312], "spans": [[0, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [136.79901123046875, 637.6585083007812, 189.86537170410156, 646.8715209960938], "spans": [[1, 0]], "text": "Trademarks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [195.3968505859375, 637.6585083007812, 547.182861328125, 646.8715209960938], "spans": [[1, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . viii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [136.79901123046875, 615.1588745117188, 279.3973083496094, 624.3718872070312], "spans": [[2, 0]], "text": "DB2 for i Center of Excellence", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [280.6194152832031, 615.1588745117188, 547.1907958984375, 624.3718872070312], "spans": [[2, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ix", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [136.79901123046875, 592.6592407226562, 172.84423828125, 601.8722534179688], "spans": [[3, 0]], "text": "Preface", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [175.01852416992188, 592.6592407226562, 547.182861328125, 601.8722534179688], "spans": [[3, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [136.79803466796875, 580.1793823242188, 547.1808471679688, 589.3923950195312], "spans": [[4, 0]], "text": "Authors . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xi", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [136.79803466796875, 567.6397705078125, 339.18292236328125, 576.852783203125], "spans": [[5, 0]], "text": "Now you can become a published author, too!", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [344.714111328125, 567.6397705078125, 547.1387939453125, 576.852783203125], "spans": [[5, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [136.79803466796875, 555.159912109375, 529.9950561523438, 564.3729248046875], "spans": [[6, 0]], "text": "Comments welcome. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [535.5494995117188, 555.159912109375, 547.1978759765625, 564.3729248046875], "spans": [[6, 1]], "text": "xiii", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [136.79806518554688, 542.6800537109375, 284.0286560058594, 551.89306640625], "spans": [[7, 0]], "text": "Stay connected to IBM Redbooks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [289.54449462890625, 542.6800537109375, 547.1211547851562, 551.89306640625], "spans": [[7, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiv", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [136.79806518554688, 520.180419921875, 536.0958862304688, 529.3934326171875], "spans": [[8, 0]], "text": "Chapter 1. Securing and protecting IBM DB2 data . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [541.6468505859375, 520.180419921875, 547.1978149414062, 529.3934326171875], "spans": [[8, 1]], "text": "1", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [136.79808044433594, 508.18060302734375, 549.8472290039062, 517.3936157226562], "spans": [[9, 0]], "text": "1.1 Security fundamentals. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [136.79806518554688, 495.6409606933594, 536.1293334960938, 504.85394287109375], "spans": [[10, 0]], "text": "1.2 Current state of IBM i security . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [541.6611328125, 495.6409606933594, 547.19287109375, 504.85394287109375], "spans": [[10, 1]], "text": "2", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [136.79806518554688, 483.16107177734375, 549.8472290039062, 492.3740539550781], "spans": [[11, 0]], "text": "1.3 DB2 for i security controls . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [151.19720458984375, 470.6811828613281, 536.0551147460938, 479.8941650390625], "spans": [[12, 0]], "text": "1.3.1 Existing row and column control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [541.6015014648438, 470.6811828613281, 547.14794921875, 479.8941650390625], "spans": [[12, 1]], "text": "4", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [151.19720458984375, 458.14154052734375, 536.080078125, 467.3545227050781], "spans": [[13, 0]], "text": "1.3.2 New controls: Row and Column Access Control. . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [541.635498046875, 458.14154052734375, 547.19091796875, 467.3545227050781], "spans": [[13, 1]], "text": "5", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [136.7970428466797, 435.64190673828125, 536.0908813476562, 444.8548889160156], "spans": [[14, 0]], "text": "Chapter 2. Roles and separation of duties . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [541.642822265625, 435.64190673828125, 547.1947631835938, 444.8548889160156], "spans": [[14, 1]], "text": "7", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [136.7970428466797, 423.64208984375, 536.1271362304688, 432.8550720214844], "spans": [[15, 0]], "text": "2.1 Roles . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [541.6658935546875, 423.64208984375, 547.2047119140625, 432.8550720214844], "spans": [[15, 1]], "text": "8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [151.19720458984375, 411.1622009277344, 535.9526977539062, 420.37518310546875], "spans": [[16, 0]], "text": "2.1.1 DDM and DRDA application server access: QIBM_DB_DDMDRDA . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [541.5558471679688, 411.1622009277344, 547.1590576171875, 420.37518310546875], "spans": [[16, 1]], "text": "8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 16, "row-header": false, "row-span": [16, 17]}], [{"bbox": [151.19720458984375, 398.68231201171875, 536.0410766601562, 407.8952941894531], "spans": [[17, 0]], "text": "2.1.2 Toolbox application server access: QIBM_DB_ZDA. . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": [541.595947265625, 398.68231201171875, 547.1508178710938, 407.8952941894531], "spans": [[17, 1]], "text": "8", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 17, "row-header": false, "row-span": [17, 18]}], [{"bbox": [151.19720458984375, 386.1426696777344, 536.0748901367188, 395.35565185546875], "spans": [[18, 0]], "text": "2.1.3 Database Administrator function: QIBM_DB_SQLADM . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": [541.6302490234375, 386.1426696777344, 547.1856079101562, 395.35565185546875], "spans": [[18, 1]], "text": "9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 18, "row-header": false, "row-span": [18, 19]}], [{"bbox": [151.19720458984375, 373.66278076171875, 411.2704772949219, 382.8757629394531], "spans": [[19, 0]], "text": "2.1.4 Database Information function: QIBM_DB_SYSMON", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": [416.8177490234375, 373.66278076171875, 547.1786499023438, 382.8757629394531], "spans": [[19, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . 9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 19, "row-header": false, "row-span": [19, 20]}], [{"bbox": [151.19720458984375, 361.1828918457031, 536.035888671875, 370.3958740234375], "spans": [[20, 0]], "text": "2.1.5 Security Administrator function: QIBM_DB_SECADM . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": [541.5989379882812, 361.1828918457031, 547.1619262695312, 370.3958740234375], "spans": [[20, 1]], "text": "9", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 20, "row-header": false, "row-span": [20, 21]}], [{"bbox": [151.19720458984375, 348.64324951171875, 530.5731811523438, 357.8562316894531], "spans": [[21, 0]], "text": "2.1.6 Change Function Usage CL command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": [536.1044311523438, 348.64324951171875, 547.1668701171875, 357.8562316894531], "spans": [[21, 1]], "text": "10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 21, "row-header": false, "row-span": [21, 22]}], [{"bbox": [151.19720458984375, 336.1633605957031, 530.5352172851562, 345.3763427734375], "spans": [[22, 0]], "text": "2.1.7 Verifying function usage IDs for RCAC with the FUNCTION_USAGE view . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 22, "row-header": false, "row-span": [22, 23]}, {"bbox": [536.0755004882812, 336.1633605957031, 547.156005859375, 345.3763427734375], "spans": [[22, 1]], "text": "10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 22, "row-header": false, "row-span": [22, 23]}], [{"bbox": [136.7970428466797, 323.6834716796875, 547.256591796875, 332.8964538574219], "spans": [[23, 0]], "text": "2.2 Separation of duties . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 23, "row-header": false, "row-span": [23, 24]}, {"bbox": null, "spans": [[23, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 23, "row-header": false, "row-span": [23, 24]}], [{"bbox": [136.79702758789062, 301.183837890625, 530.5396118164062, 310.3968200683594], "spans": [[24, 0]], "text": "Chapter 3. Row and Column Access Control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 24, "row-header": false, "row-span": [24, 25]}, {"bbox": [536.0916748046875, 301.183837890625, 547.19580078125, 310.3968200683594], "spans": [[24, 1]], "text": "13", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 24, "row-header": false, "row-span": [24, 25]}], [{"bbox": [136.79702758789062, 289.18402099609375, 530.4808959960938, 298.3970031738281], "spans": [[25, 0]], "text": "3.1 Explanation of RCAC and the concept of access control . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 25, "row-header": false, "row-span": [25, 26]}, {"bbox": [536.04248046875, 289.18402099609375, 547.1657104492188, 298.3970031738281], "spans": [[25, 1]], "text": "14", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 25, "row-header": false, "row-span": [25, 26]}], [{"bbox": [151.1971893310547, 276.6443786621094, 378.2078552246094, 285.85736083984375], "spans": [[26, 0]], "text": "3.1.1 Row permission and column mask definitions", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 26, "row-header": false, "row-span": [26, 27]}, {"bbox": [383.74713134765625, 276.6443786621094, 547.15576171875, 285.85736083984375], "spans": [[26, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . 14", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 26, "row-header": false, "row-span": [26, 27]}], [{"bbox": [151.1971893310547, 264.16448974609375, 530.4347534179688, 273.3774719238281], "spans": [[27, 0]], "text": "3.1.2 Enabling and activating RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 27, "row-header": false, "row-span": [27, 28]}, {"bbox": [535.9962158203125, 264.16448974609375, 547.1190795898438, 273.3774719238281], "spans": [[27, 1]], "text": "16", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 27, "row-header": false, "row-span": [27, 28]}], [{"bbox": [136.79702758789062, 251.6248321533203, 530.528076171875, 260.83782958984375], "spans": [[28, 0]], "text": "3.2 Special registers and built-in global variables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 28, "row-header": false, "row-span": [28, 29]}, {"bbox": [536.0670166015625, 251.6248321533203, 547.1448364257812, 260.83782958984375], "spans": [[28, 1]], "text": "18", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 28, "row-header": false, "row-span": [28, 29]}], [{"bbox": [151.1971893310547, 239.14495849609375, 530.4978637695312, 248.3579559326172], "spans": [[29, 0]], "text": "3.2.1 Special registers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 29, "row-header": false, "row-span": [29, 30]}, {"bbox": [536.0518798828125, 239.14495849609375, 547.159912109375, 248.3579559326172], "spans": [[29, 1]], "text": "18", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 29, "row-header": false, "row-span": [29, 30]}], [{"bbox": [151.1971893310547, 226.6650848388672, 530.5602416992188, 235.87808227539062], "spans": [[30, 0]], "text": "3.2.2 Built-in global variables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 30, "row-header": false, "row-span": [30, 31]}, {"bbox": [536.09912109375, 226.6650848388672, 547.1768798828125, 235.87808227539062], "spans": [[30, 1]], "text": "19", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 30, "row-header": false, "row-span": [30, 31]}], [{"bbox": [136.79702758789062, 214.1254425048828, 530.5302734375, 223.33843994140625], "spans": [[31, 0]], "text": "3.3 VERIFY_GROUP_FOR_USER function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 31, "row-header": false, "row-span": [31, 32]}, {"bbox": [536.0615234375, 214.1254425048828, 547.1240234375, 223.33843994140625], "spans": [[31, 1]], "text": "20", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 31, "row-header": false, "row-span": [31, 32]}], [{"bbox": [136.79702758789062, 201.64556884765625, 530.6299438476562, 210.8585662841797], "spans": [[32, 0]], "text": "3.4 Establishing and controlling accessibility by using the RCAC rule text . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 32, "row-header": false, "row-span": [32, 33]}, {"bbox": [536.1631469726562, 201.64556884765625, 547.2295532226562, 210.8585662841797], "spans": [[32, 1]], "text": "21", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 32, "row-header": false, "row-span": [32, 33]}], [{"bbox": [136.79701232910156, 189.1656951904297, 394.78179931640625, 198.37869262695312], "spans": [[33, 0]], "text": "3.5 SELECT, INSERT, and UPDATE behavior with RCAC", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 33, "row-header": false, "row-span": [33, 34]}, {"bbox": [400.3206481933594, 189.1656951904297, 547.10009765625, 198.37869262695312], "spans": [[33, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . 22", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 33, "row-header": false, "row-span": [33, 34]}], [{"bbox": [136.79701232910156, 176.6260528564453, 530.5651245117188, 185.83905029296875], "spans": [[34, 0]], "text": "3.6 Human resources example . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 34, "row-header": false, "row-span": [34, 35]}, {"bbox": [536.1119995117188, 176.6260528564453, 547.2057495117188, 185.83905029296875], "spans": [[34, 1]], "text": "22", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 34, "row-header": false, "row-span": [34, 35]}], [{"bbox": [151.19717407226562, 164.14617919921875, 530.4913940429688, 173.3591766357422], "spans": [[35, 0]], "text": "3.6.1 Assigning the QIBM_DB_SECADM function ID to the consultants. . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 35, "row-header": false, "row-span": [35, 36]}, {"bbox": [536.0463256835938, 164.14617919921875, 547.1561889648438, 173.3591766357422], "spans": [[35, 1]], "text": "23", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 35, "row-header": false, "row-span": [35, 36]}], [{"bbox": [151.19717407226562, 151.6663055419922, 530.5645751953125, 160.87930297851562], "spans": [[36, 0]], "text": "3.6.2 Creating group profiles for the users and their roles . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 36, "row-header": false, "row-span": [36, 37]}, {"bbox": [536.0960083007812, 151.6663055419922, 547.1587524414062, 160.87930297851562], "spans": [[36, 1]], "text": "23", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 36, "row-header": false, "row-span": [36, 37]}], [{"bbox": [151.19717407226562, 139.1266632080078, 530.5569458007812, 148.33966064453125], "spans": [[37, 0]], "text": "3.6.3 Demonstrating data access without RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 37, "row-header": false, "row-span": [37, 38]}, {"bbox": [536.0881958007812, 139.1266632080078, 547.1507568359375, 148.33966064453125], "spans": [[37, 1]], "text": "24", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 37, "row-header": false, "row-span": [37, 38]}], [{"bbox": [151.19717407226562, 126.64678955078125, 530.5341186523438, 135.8597869873047], "spans": [[38, 0]], "text": "3.6.4 Defining and creating row permissions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 38, "row-header": false, "row-span": [38, 39]}, {"bbox": [536.072998046875, 126.64678955078125, 547.15087890625, 135.8597869873047], "spans": [[38, 1]], "text": "25", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 38, "row-header": false, "row-span": [38, 39]}], [{"bbox": [151.19717407226562, 114.16690826416016, 339.4510498046875, 123.37991333007812], "spans": [[39, 0]], "text": "3.6.5 Defining and creating column masks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 39, "row-header": false, "row-span": [39, 40]}, {"bbox": [344.9899597167969, 114.16690826416016, 547.160888671875, 123.37991333007812], "spans": [[39, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 39, "row-header": false, "row-span": [39, 40]}], [{"bbox": [151.19717407226562, 101.62727355957031, 530.541015625, 110.84027099609375], "spans": [[40, 0]], "text": "3.6.6 Activating RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 40, "row-header": false, "row-span": [40, 41]}, {"bbox": [536.087646484375, 101.62727355957031, 547.1808471679688, 110.84027099609375], "spans": [[40, 1]], "text": "28", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 40, "row-header": false, "row-span": [40, 41]}], [{"bbox": [151.19717407226562, 89.14738464355469, 530.5750732421875, 98.36038970947266], "spans": [[41, 0]], "text": "3.6.7 Demonstrating data access with RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 41, "row-header": false, "row-span": [41, 42]}, {"bbox": [536.1066284179688, 89.14738464355469, 547.169677734375, 98.36038970947266], "spans": [[41, 1]], "text": "29", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 41, "row-header": false, "row-span": [41, 42]}], [{"bbox": [151.19717407226562, 76.6675033569336, 530.436279296875, 85.88050842285156], "spans": [[42, 0]], "text": "3.6.8 Demonstrating data access with a view and RCAC . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 42, "row-header": false, "row-span": [42, 43]}, {"bbox": [535.9984741210938, 76.6675033569336, 547.1228637695312, 85.88050842285156], "spans": [[42, 1]], "text": "32", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 42, "row-header": false, "row-span": [42, 43]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [132.29698181152344, 56.788063049316406, 547.6959228515625, 721.551025390625], "page": 6, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 49, "data": [[{"bbox": [136.8000030517578, 711.2783203125, 530.5958862304688, 720.4913330078125], "spans": [[0, 0], [0, 1]], "text": "Chapter 4. Implementing Row and Column Access Control: Banking example . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [536.1328125, 711.2783203125, 547.2067260742188, 720.4913330078125], "spans": [[0, 1]], "text": "37", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [136.80001831054688, 699.2785034179688, 530.5200805664062, 708.4915161132812], "spans": [[1, 0], [1, 1]], "text": "4.1 Business requirements for the RCAC banking scenario . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [536.0591430664062, 699.2785034179688, 547.13720703125, 708.4915161132812], "spans": [[1, 1]], "text": "38", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [136.80001831054688, 686.7986450195312, 530.5469970703125, 696.0116577148438], "spans": [[2, 0], [2, 1]], "text": "4.2 Description of the users roles and responsibilities . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [536.0863037109375, 686.7986450195312, 547.1648559570312, 696.0116577148438], "spans": [[2, 1]], "text": "39", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [136.80001831054688, 674.259033203125, 530.5362548828125, 683.4720458984375], "spans": [[3, 0], [3, 1]], "text": "4.3 Implementation of RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [536.0903930664062, 674.259033203125, 547.19873046875, 683.4720458984375], "spans": [[3, 1]], "text": "42", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [151.20018005371094, 661.7791748046875, 400.5744323730469, 670.9921875], "spans": [[4, 0], [4, 1]], "text": "4.3.1 Reviewing the tables that are used in this example", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [406.10546875, 661.7791748046875, 547.14697265625, 670.9921875], "spans": [[4, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . 42", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [151.20018005371094, 649.29931640625, 516.9255981445312, 658.5123291015625], "spans": [[5, 0], [5, 1]], "text": "4.3.2 Assigning function ID QIBM_DB_SECADM to the Database Engineers group", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [522.4603881835938, 649.29931640625, 547.3670654296875, 658.5123291015625], "spans": [[5, 1]], "text": ". . 47", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [151.20018005371094, 636.7597045898438, 530.5675659179688, 645.9727172851562], "spans": [[6, 0], [6, 1]], "text": "4.3.3 Creating group profiles for the users and their roles . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [536.0989379882812, 636.7597045898438, 547.1617431640625, 645.9727172851562], "spans": [[6, 1]], "text": "50", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": null, "spans": [[7, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [417.15240478515625, 624.2798461914062, 547.1438598632812, 633.4928588867188], "spans": [[7, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . 52", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [151.20018005371094, 611.7999877929688, 530.5370483398438, 621.0130004882812], "spans": [[8, 0], [8, 1]], "text": "4.3.5 Defining and creating row permissions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [536.0759887695312, 611.7999877929688, 547.1538696289062, 621.0130004882812], "spans": [[8, 1]], "text": "54", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [151.20018005371094, 599.2603759765625, 339.45404052734375, 608.473388671875], "spans": [[9, 0], [9, 1]], "text": "4.3.6 Defining and creating column masks", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [536.0859985351562, 599.2603759765625, 547.1638793945312, 608.473388671875], "spans": [[9, 1]], "text": "58", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [151.20018005371094, 586.780517578125, 530.5470581054688, 608.473388671875], "spans": [[10, 0], [10, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4.3.7 Restricting the inserting and updating of masked data . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [536.078125, 586.780517578125, 547.15576171875, 595.9935302734375], "spans": [[10, 1]], "text": "60", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": null, "spans": [[11, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [536.0916137695312, 514.3016967773438, 547.1978149414062, 523.5147094726562], "spans": [[11, 1]], "text": "79", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [151.20018005371094, 561.7610473632812, 530.4820556640625, 583.513671875], "spans": [[12, 0], [12, 1]], "text": "4.3.8 Activating row and column access control . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4.3.9 Reviewing row permissions. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [536.036376953125, 561.7610473632812, 547.14501953125, 570.9740600585938], "spans": [[12, 1]], "text": "64", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [151.20018005371094, 549.2811889648438, 530.44921875, 558.4942016601562], "spans": [[13, 0], [13, 1]], "text": "4.3.10 Demonstrating data access with RCAC . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [536.0194702148438, 549.2811889648438, 547.1600341796875, 558.4942016601562], "spans": [[13, 1]], "text": "66", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [151.20018005371094, 536.8013305664062, 530.4335327148438, 546.0143432617188], "spans": [[14, 0], [14, 1]], "text": "4.3.11 Query implementation with RCAC activated . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [536.0037231445312, 536.8013305664062, 547.1441040039062, 546.0143432617188], "spans": [[14, 1]], "text": "75", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [136.80001831054688, 514.3016967773438, 530.5385131835938, 523.5147094726562], "spans": [[15, 0], [15, 1]], "text": "Chapter 5. RCAC and non-SQL interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [536.099609375, 502.3018798828125, 547.1768798828125, 511.5148620605469], "spans": [[15, 1]], "text": "80", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [136.80001831054688, 502.3018798828125, 530.5609741210938, 511.5148620605469], "spans": [[16, 0], [16, 1]], "text": "5.1 Unsupported interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [136.80001831054688, 502.3018798828125, 530.5609741210938, 511.5148620605469], "spans": [[16, 0], [16, 1]], "text": "5.1 Unsupported interfaces . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 16, "row-header": false, "row-span": [16, 17]}], [{"bbox": [136.80001831054688, 477.2823486328125, 530.4937744140625, 498.9752197265625], "spans": [[17, 0], [17, 1]], "text": "5.2 Native query result differences . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.3 Accidental updates with masked values . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": [136.80001831054688, 477.2823486328125, 530.4937744140625, 498.9752197265625], "spans": [[17, 0], [17, 1]], "text": "5.2 Native query result differences . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.3 Accidental updates with masked values . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 17, "row-header": false, "row-span": [17, 18]}], [{"bbox": null, "spans": [[18, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": [536.0474853515625, 477.2823486328125, 547.1549072265625, 486.4953308105469], "spans": [[18, 1]], "text": "81", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 18, "row-header": false, "row-span": [18, 19]}], [{"bbox": [136.80001831054688, 464.8024597167969, 530.5643310546875, 474.01544189453125], "spans": [[19, 0], [19, 1]], "text": "5.4 System CL commands considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": [536.0958251953125, 464.8024597167969, 547.158935546875, 474.01544189453125], "spans": [[19, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 19, "row-header": false, "row-span": [19, 20]}], [{"bbox": [151.20018005371094, 452.2628173828125, 530.4598999023438, 461.4757995605469], "spans": [[20, 0], [20, 1]], "text": "5.4.1 Create Duplicate Object (CRTDUPOBJ) command . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": [536.0228271484375, 452.2628173828125, 547.148681640625, 461.4757995605469], "spans": [[20, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 20, "row-header": false, "row-span": [20, 21]}], [{"bbox": null, "spans": [[21, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": [536.0770874023438, 439.7829284667969, 547.1549682617188, 448.99591064453125], "spans": [[21, 1]], "text": "82", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 21, "row-header": false, "row-span": [21, 22]}], [{"bbox": [151.20018005371094, 427.30303955078125, 530.5381469726562, 448.99591064453125], "spans": [[22, 0], [22, 1]], "text": "5.4.2 Copy File (CPYF) command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.4.3 Copy Library (CPYLIB) command. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 22, "row-header": false, "row-span": [22, 23]}, {"bbox": [151.20018005371094, 427.30303955078125, 530.5381469726562, 448.99591064453125], "spans": [[22, 0], [22, 1]], "text": "5.4.2 Copy File (CPYF) command . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5.4.3 Copy Library (CPYLIB) command. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 22, "row-header": false, "row-span": [22, 23]}], [{"bbox": null, "spans": [[23, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 23, "row-header": false, "row-span": [23, 24]}, {"bbox": [536.0574340820312, 427.30303955078125, 547.182861328125, 436.5160217285156], "spans": [[23, 1]], "text": "83", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 23, "row-header": false, "row-span": [23, 24]}], [{"bbox": [136.80001831054688, 404.80340576171875, 530.5385131835938, 414.0163879394531], "spans": [[24, 0], [24, 1]], "text": "Chapter 6. Additional considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 24, "row-header": false, "row-span": [24, 25]}, {"bbox": [530.4888916015625, 142.78761291503906, 547.19970703125, 152.0006103515625], "spans": [[24, 1]], "text": "108", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 24, "row-header": false, "row-span": [24, 25]}], [{"bbox": [136.80003356933594, 380.2639465332031, 530.4944458007812, 389.4769287109375], "spans": [[25, 0], [25, 1]], "text": "6.2 RCAC effects on data movement . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 25, "row-header": false, "row-span": [25, 26]}, {"bbox": [136.80003356933594, 380.2639465332031, 530.4944458007812, 389.4769287109375], "spans": [[25, 0], [25, 1]], "text": "6.2 RCAC effects on data movement . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 25, "row-header": false, "row-span": [25, 26]}], [{"bbox": [405.7154541015625, 367.7840576171875, 530.5139770507812, 376.9970397949219], "spans": [[26, 0], [26, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 26, "row-header": false, "row-span": [26, 27]}, {"bbox": [536.0606079101562, 367.7840576171875, 547.15380859375, 376.9970397949219], "spans": [[26, 1]], "text": "88", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 26, "row-header": false, "row-span": [26, 27]}], [{"bbox": [151.2001953125, 367.7840576171875, 400.1688232421875, 376.9970397949219], "spans": [[27, 0], [27, 1]], "text": "6.2.1 Effects when RCAC is defined on the source table", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 27, "row-header": false, "row-span": [27, 28]}, {"bbox": [151.2001953125, 367.7840576171875, 400.1688232421875, 376.9970397949219], "spans": [[27, 0], [27, 1]], "text": "6.2.1 Effects when RCAC is defined on the source table", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 27, "row-header": false, "row-span": [27, 28]}], [{"bbox": [136.80003356933594, 330.2846374511719, 530.5997924804688, 351.9775085449219], "spans": [[28, 0], [28, 1]], "text": "6.2.3 Effects when RCAC is defined on both source and target tables . . . . . . . . . . . . . 6.3 RCAC effects on joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 28, "row-header": false, "row-span": [28, 29]}, {"bbox": [536.0498657226562, 330.2846374511719, 547.2177734375, 351.9775085449219], "spans": [[28, 1]], "text": "90 91", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 28, "row-header": false, "row-span": [28, 29]}], [{"bbox": [151.2001953125, 317.80474853515625, 547.2595825195312, 327.0177307128906], "spans": [[29, 0], [29, 1]], "text": "6.3.1 Inner joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 29, "row-header": false, "row-span": [29, 30]}, {"bbox": [151.2001953125, 317.80474853515625, 547.2595825195312, 327.0177307128906], "spans": [[29, 0], [29, 1]], "text": "6.3.1 Inner joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 29, "row-header": false, "row-span": [29, 30]}], [{"bbox": [151.20016479492188, 305.2651062011719, 547.2595825195312, 314.47808837890625], "spans": [[30, 0], [30, 1]], "text": "6.3.2 Outer joins. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 30, "row-header": false, "row-span": [30, 31]}, {"bbox": [151.20016479492188, 305.2651062011719, 547.2595825195312, 314.47808837890625], "spans": [[30, 0], [30, 1]], "text": "6.3.2 Outer joins. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 30, "row-header": false, "row-span": [30, 31]}], [{"bbox": [151.20016479492188, 292.78521728515625, 530.481201171875, 301.9981994628906], "spans": [[31, 0], [31, 1]], "text": "6.3.3 Exception joins . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 31, "row-header": false, "row-span": [31, 32]}, {"bbox": [536.051025390625, 292.78521728515625, 547.1907348632812, 301.9981994628906], "spans": [[31, 1]], "text": "96", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 31, "row-header": false, "row-span": [31, 32]}], [{"bbox": [136.8000030517578, 280.2455749511719, 372.92724609375, 289.45855712890625], "spans": [[32, 0], [32, 1]], "text": "6.4 Monitoring, analyzing, and debugging with RCAC", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 32, "row-header": false, "row-span": [32, 33]}, {"bbox": [536.115966796875, 280.2455749511719, 547.1796264648438, 289.45855712890625], "spans": [[32, 1]], "text": "97", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 32, "row-header": false, "row-span": [32, 33]}], [{"bbox": [378.45904541015625, 280.2455749511719, 530.5841674804688, 289.45855712890625], "spans": [[33, 0], [33, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 33, "row-header": false, "row-span": [33, 34]}, {"bbox": [178.8563232421875, 267.76568603515625, 547.1707763671875, 276.9786682128906], "spans": [[33, 1]], "text": "Query monitoring and analysis tools . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 33, "row-header": false, "row-span": [33, 34]}], [{"bbox": [151.20016479492188, 255.28578186035156, 530.5306396484375, 264.498779296875], "spans": [[34, 0], [34, 1]], "text": "6.4.2 Index advisor. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 34, "row-header": false, "row-span": [34, 35]}, {"bbox": [151.20016479492188, 255.28578186035156, 530.5306396484375, 264.498779296875], "spans": [[34, 0], [34, 1]], "text": "6.4.2 Index advisor. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 34, "row-header": false, "row-span": [34, 35]}], [{"bbox": [151.20013427734375, 242.7461395263672, 525.0111083984375, 251.95913696289062], "spans": [[35, 0], [35, 1]], "text": "6.4.3 Metadata using catalogs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 35, "row-header": false, "row-span": [35, 36]}, {"bbox": [151.20013427734375, 242.7461395263672, 525.0111083984375, 251.95913696289062], "spans": [[35, 0], [35, 1]], "text": "6.4.3 Metadata using catalogs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 35, "row-header": false, "row-span": [35, 36]}], [{"bbox": [136.7999725341797, 230.26626586914062, 524.8056640625, 239.47926330566406], "spans": [[36, 0], [36, 1]], "text": "6.5 Views, materialized query tables, and query rewrite with RCAC . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 36, "row-header": false, "row-span": [36, 37]}, {"bbox": [530.3905029296875, 230.26626586914062, 547.14501953125, 239.47926330566406], "spans": [[36, 1]], "text": "102", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 36, "row-header": false, "row-span": [36, 37]}], [{"bbox": [179.0886993408203, 205.2467498779297, 524.8568115234375, 214.45974731445312], "spans": [[37, 0], [37, 1]], "text": "Materialized query tables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 37, "row-header": false, "row-span": [37, 38]}, {"bbox": [530.4345092773438, 205.2467498779297, 547.1676635742188, 214.45974731445312], "spans": [[37, 1]], "text": "103", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 37, "row-header": false, "row-span": [37, 38]}], [{"bbox": [151.20013427734375, 205.2467498779297, 173.510986328125, 214.45974731445312], "spans": [[38, 0], [38, 1]], "text": "6.5.2", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 38, "row-header": false, "row-span": [38, 39]}, {"bbox": [530.5247192382812, 192.76687622070312, 547.1878051757812, 201.97987365722656], "spans": [[38, 1]], "text": "105", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 38, "row-header": false, "row-span": [38, 39]}], [{"bbox": [151.20013427734375, 192.76687622070312, 238.92100524902344, 201.97987365722656], "spans": [[39, 0], [39, 1]], "text": "6.5.3 Query rewrite", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 39, "row-header": false, "row-span": [39, 40]}, {"bbox": [151.20013427734375, 192.76687622070312, 238.92100524902344, 201.97987365722656], "spans": [[39, 0], [39, 1]], "text": "6.5.3 Query rewrite", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 39, "row-header": false, "row-span": [39, 40]}], [{"bbox": [136.7999725341797, 180.28700256347656, 524.9227905273438, 189.5], "spans": [[40, 0], [40, 1]], "text": "6.6 RCAC effects on performance and scalability. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 40, "row-header": false, "row-span": [40, 41]}, {"bbox": [530.4845581054688, 180.28700256347656, 547.1698608398438, 189.5], "spans": [[40, 1]], "text": "105", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 40, "row-header": false, "row-span": [40, 41]}], [{"bbox": null, "spans": [[41, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 41, "row-header": false, "row-span": [41, 42]}, {"bbox": [530.43701171875, 167.7473602294922, 547.1228637695312, 176.96035766601562], "spans": [[41, 1]], "text": "107", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 41, "row-header": false, "row-span": [41, 42]}], [{"bbox": [136.7999725341797, 155.26748657226562, 525.0469970703125, 176.96035766601562], "spans": [[42, 0], [42, 1]], "text": "6.7 Exclusive lock to implement RCAC (availability issues) . . . . . . . . . . . . . . . . . . . . . . . 6.8 Avoiding propagation of masked data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 42, "row-header": false, "row-span": [42, 43]}, {"bbox": [136.7999725341797, 155.26748657226562, 525.0469970703125, 176.96035766601562], "spans": [[42, 0], [42, 1]], "text": "6.7 Exclusive lock to implement RCAC (availability issues) . . . . . . . . . . . . . . . . . . . . . . . 6.8 Avoiding propagation of masked data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 42, "row-header": false, "row-span": [42, 43]}], [{"bbox": null, "spans": [[43, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 43, "row-header": false, "row-span": [43, 44]}, {"bbox": [530.5781860351562, 155.26748657226562, 547.1717529296875, 164.48048400878906], "spans": [[43, 1]], "text": "108", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 43, "row-header": false, "row-span": [43, 44]}], [{"bbox": null, "spans": [[44, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 44, "row-header": false, "row-span": [44, 45]}, {"bbox": [530.43359375, 117.7680892944336, 547.19677734375, 139.46096801757812], "spans": [[44, 1]], "text": "109 109", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 44, "row-header": false, "row-span": [44, 45]}], [{"bbox": null, "spans": [[45, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 45, "row-header": false, "row-span": [45, 46]}, {"bbox": [530.4365234375, 92.74856567382812, 547.2177124023438, 101.9615707397461], "spans": [[45, 1]], "text": "110", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 45, "row-header": false, "row-span": [45, 46]}], [{"bbox": null, "spans": [[46, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 46, "row-header": false, "row-span": [46, 47]}, {"bbox": [530.5514526367188, 57.76904296875, 547.2017211914062, 66.98204803466797], "spans": [[46, 1]], "text": "113", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 46, "row-header": false, "row-span": [46, 47]}], [{"bbox": null, "spans": [[47, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 47, "row-header": false, "row-span": [47, 48]}, {"bbox": [530.3995361328125, 80.26868438720703, 547.1510009765625, 89.481689453125], "spans": [[47, 1]], "text": "111", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 47, "row-header": false, "row-span": [47, 48]}], [{"bbox": [136.79995727539062, 57.76904296875, 525.0014038085938, 66.98204803466797], "spans": [[48, 0], [48, 1]], "text": "Chapter 7. Row and Column Access Control management . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 2], "row": 48, "row-header": false, "row-span": [48, 49]}, {"bbox": [136.79995727539062, 57.76904296875, 525.0014038085938, 66.98204803466797], "spans": [[48, 0], [48, 1]], "text": "Chapter 7. Row and Column Access Control management . . . . . . . . . . . . . . . . . . . .", "type": "row_header", "col": 1, "col-header": false, "col-span": [0, 2], "row": 48, "row-header": false, "row-span": [48, 49]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [135.956298828125, 482.10968017578125, 547.4403686523438, 721.7639770507812], "page": 7, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 17, "data": [[{"bbox": [136.79989624023438, 711.2779541015625, 524.9435424804688, 720.490966796875], "spans": [[0, 0]], "text": "7.1 Managing row permissions and column masks. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [530.4978637695312, 711.2779541015625, 547.1608276367188, 720.490966796875], "spans": [[0, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [151.20005798339844, 698.798095703125, 524.9487915039062, 708.0111083984375], "spans": [[1, 0]], "text": "7.1.1 Source management. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [530.510986328125, 698.798095703125, 547.1976318359375, 708.0111083984375], "spans": [[1, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [151.20005798339844, 686.2584838867188, 524.9796752929688, 695.4714965820312], "spans": [[2, 0]], "text": "7.1.2 Modifying definitions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [530.5341796875, 686.2584838867188, 547.1976928710938, 695.4714965820312], "spans": [[2, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [151.20005798339844, 673.7786254882812, 525.0257568359375, 682.9916381835938], "spans": [[3, 0]], "text": "7.1.3 Turning on and off . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [530.5559692382812, 673.7786254882812, 547.1466064453125, 682.9916381835938], "spans": [[3, 1]], "text": "114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [151.20005798339844, 661.2987670898438, 238.93310546875, 670.5117797851562], "spans": [[4, 0]], "text": "7.1.4 Regenerating", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [244.48696899414062, 661.2987670898438, 547.1724853515625, 670.5117797851562], "spans": [[4, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 114", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [136.79989624023438, 648.7591552734375, 524.9177856445312, 657.97216796875], "spans": [[5, 0]], "text": "7.2 Managing tables with row permissions and column masks. . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [530.4720458984375, 648.7591552734375, 547.134765625, 657.97216796875], "spans": [[5, 1]], "text": "115", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [151.20005798339844, 636.279296875, 524.933349609375, 645.4923095703125], "spans": [[6, 0]], "text": "7.2.1 Save and restore. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [530.4951782226562, 636.279296875, 547.1807250976562, 645.4923095703125], "spans": [[6, 1]], "text": "115", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [151.20005798339844, 623.7994384765625, 524.9628295898438, 633.012451171875], "spans": [[7, 0]], "text": "7.2.2 Table migration . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [530.5087280273438, 623.7994384765625, 547.1466064453125, 633.012451171875], "spans": [[7, 1]], "text": "116", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [136.79989624023438, 611.2598266601562, 524.9552612304688, 620.4728393554688], "spans": [[8, 0]], "text": "7.3 Monitoring and auditing function usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [530.5089111328125, 611.2598266601562, 547.1697998046875, 620.4728393554688], "spans": [[8, 1]], "text": "117", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [136.79989624023438, 588.7601928710938, 362.6678466796875, 597.9732055664062], "spans": [[9, 0]], "text": "Chapter 8. Designing and planning for success", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [363.9595947265625, 588.7601928710938, 547.1986694335938, 597.9732055664062], "spans": [[9, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . 119", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [136.7998809814453, 576.7603759765625, 416.633056640625, 585.973388671875], "spans": [[10, 0]], "text": "8.1 Implementing RCAC with good design and proper planning", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [422.1871643066406, 576.7603759765625, 547.1546020507812, 585.973388671875], "spans": [[10, 1]], "text": ". . . . . . . . . . . . . . . . . . . 120", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [136.7998809814453, 564.280517578125, 524.86376953125, 573.4935302734375], "spans": [[11, 0]], "text": "8.2 DB2 for i Center of Excellence . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [530.440185546875, 564.280517578125, 547.1694946289062, 573.4935302734375], "spans": [[11, 1]], "text": "120", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [136.7998809814453, 541.7808837890625, 447.0309753417969, 550.993896484375], "spans": [[12, 0]], "text": "Appendix A. Database definitions for the RCAC banking example", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [447.35968017578125, 541.7808837890625, 547.2036743164062, 550.993896484375], "spans": [[12, 1]], "text": ". . . . . . . . . . . . . . 121", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [136.79989624023438, 519.7612915039062, 234.45175170898438, 528.9743041992188], "spans": [[13, 0]], "text": "Related publications", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [236.15985107421875, 519.7612915039062, 547.1917114257812, 528.9743041992188], "spans": [[13, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [136.79989624023438, 507.2814025878906, 217.75054931640625, 516.494384765625], "spans": [[14, 0]], "text": "Other publications", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [223.33541870117188, 507.2814025878906, 547.258544921875, 516.494384765625], "spans": [[14, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [136.7999267578125, 494.801513671875, 212.61610412597656, 504.0144958496094], "spans": [[15, 0]], "text": "Online resources", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [218.1934814453125, 494.801513671875, 547.258544921875, 504.0144958496094], "spans": [[15, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 127", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [136.7999267578125, 482.2618713378906, 200.54580688476562, 491.474853515625], "spans": [[16, 0]], "text": "Help from IBM", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [206.0924072265625, 482.2618713378906, 547.2077026367188, 491.474853515625], "spans": [[16, 1]], "text": ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 128", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 16, "row-header": false, "row-span": [16, 17]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [75.13301086425781, 558.7689819335938, 487.9241638183594, 590.6500244140625], "page": 10, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 3, "#-rows": 3, "data": [[{"bbox": [75.5999984741211, 581.1570434570312, 111.67109680175781, 589.4819946289062], "spans": [[0, 0]], "text": "AS/400fi", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [236.40029907226562, 581.1571044921875, 259.00469970703125, 589.4820556640625], "spans": [[0, 1]], "text": "IBMfi", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [397.2005920410156, 581.1571655273438, 445.6529541015625, 589.4821166992188], "spans": [[0, 2]], "text": "Redpaper\u2122", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [75.5999984741211, 570.1167602539062, 99.66960144042969, 578.4417114257812], "spans": [[1, 0]], "text": "DB2fi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [236.40029907226562, 570.1168212890625, 307.14569091796875, 578.4417724609375], "spans": [[1, 1]], "text": "Power Systems\u2122", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [397.2005920410156, 570.1168823242188, 455.17950439453125, 578.4418334960938], "spans": [[1, 2]], "text": "Redbooks (log o) fi System", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [75.5999984741211, 559.1367797851562, 107.3051986694336, 567.4617309570312], "spans": [[2, 0]], "text": "DRDAfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [236.40029907226562, 559.1368408203125, 283.47210693359375, 567.4617919921875], "spans": [[2, 1]], "text": "Redbooksfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [427.1544189453125, 559.1369018554688, 438.3072204589844, 567.4618530273438], "spans": [[2, 2]], "text": "ifi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [135.74322509765625, 350.148193359375, 545.6257934570312, 502.103515625], "page": 26, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2-1 FUNCTION_USAGE view", "type": "table", "#-cols": 3, "#-rows": 5, "data": [[{"bbox": [142.8000030517578, 487.1369934082031, 202.2449951171875, 495.4620056152344], "spans": [[0, 0]], "text": "Column name", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [216.8087921142578, 487.1369934082031, 257.210693359375, 495.4620056152344], "spans": [[0, 1]], "text": "Data type", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [289.47479248046875, 487.1369934082031, 338.8946838378906, 495.4620056152344], "spans": [[0, 2]], "text": "Description", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [142.8000030517578, 468.1172790527344, 203.2322998046875, 476.4422912597656], "spans": [[1, 0]], "text": "FUNCTION_ID", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [216.785400390625, 468.1172790527344, 276.00360107421875, 476.4422912597656], "spans": [[1, 1]], "text": "VARCHAR(30)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [289.45770263671875, 468.1172790527344, 359.85394287109375, 476.4422912597656], "spans": [[1, 2]], "text": "ID of the function.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [142.8000030517578, 449.156982421875, 198.66929626464844, 457.48199462890625], "spans": [[2, 0]], "text": "USER_NAME", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [216.74130249023438, 449.156982421875, 275.9234924316406, 457.48199462890625], "spans": [[2, 1]], "text": "VARCHAR(10)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [289.382080078125, 438.1166687011719, 515.0535888671875, 457.48199462890625], "spans": [[2, 2]], "text": "Name of the user profile that has a usage setting for this function.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [142.79998779296875, 419.1563720703125, 173.98318481445312, 427.48138427734375], "spans": [[3, 0]], "text": "USAGE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [216.773681640625, 419.1563720703125, 270.9797668457031, 427.48138427734375], "spans": [[3, 1]], "text": "VARCHAR(7)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [289.416259765625, 397.13604736328125, 539.1071166992188, 427.48138427734375], "spans": [[3, 2]], "text": "Usage setting: GLYPH ALLOWED: The user profile is allowed to use the function. GLYPH DENIED: The user profile is not allowed to use the function.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [142.8000030517578, 378.1163330078125, 196.2248992919922, 386.44134521484375], "spans": [[4, 0]], "text": "USER_TYPE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [216.75210571289062, 378.1163330078125, 270.99871826171875, 386.44134521484375], "spans": [[4, 1]], "text": "VARCHAR(5)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [289.4316101074219, 356.15631103515625, 448.11962890625, 386.44134521484375], "spans": [[4, 2]], "text": "Type of user profile: GLYPH USER: The user profile is a user. GLYPH GROUP: The user profile is a group.", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [135.4901885986328, 202.02511596679688, 547.9369506835938, 295.5169372558594], "page": 26, "span": [0, 0], "__ref_s3_data": null}], "text": "Example 2-1 Query to determine who has authority to define and manage RCAC", "type": "table", "#-cols": 2, "#-rows": 6, "data": [[{"bbox": [136.8000030517578, 279.56719970703125, 171.26956176757812, 288.34197998046875], "spans": [[0, 0]], "text": "SELECT", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [166.78244018554688, 267.5673828125, 251.6985321044922, 288.34197998046875], "spans": [[0, 1]], "text": "function_id, user_name,", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [170.75961303710938, 255.5675811767578, 221.6990203857422, 264.34234619140625], "spans": [[1, 1]], "text": "usage,", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": null, "spans": [[2, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [167.5380859375, 243.56777954101562, 236.6987762451172, 252.342529296875], "spans": [[2, 1]], "text": "user_type", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [136.8000030517578, 231.56797790527344, 160.59396362304688, 240.3427276611328], "spans": [[3, 0]], "text": "FROM", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [178.43943786621094, 231.56797790527344, 261.7182922363281, 240.3427276611328], "spans": [[3, 1]], "text": "function_usage", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [136.8000030517578, 219.56817626953125, 162.44175720214844, 228.34292602539062], "spans": [[4, 0]], "text": "WHERE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [177.82679748535156, 219.56817626953125, 331.67730712890625, 228.34292602539062], "spans": [[4, 1]], "text": "function_id='QIBM_DB_SECADM'", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [136.8000030517578, 207.56837463378906, 178.77542114257812, 216.34312438964844], "spans": [[5, 0]], "text": "ORDER BY", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [189.269287109375, 207.56837463378906, 241.73855590820312, 216.34312438964844], "spans": [[5, 1]], "text": "user_name;", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [63.51552963256836, 70.29814147949219, 547.5021362304688, 398.0616455078125], "page": 27, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 2-2 Comparison of the different function usage IDs and *JOBCTL authority", "type": "table", "#-cols": 5, "#-rows": 14, "data": [[{"bbox": [70.80030059814453, 383.1567077636719, 119.78550720214844, 391.4817199707031], "spans": [[0, 0]], "text": "User action", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [424.93804931640625, 304.9800109863281, 433.2629699707031, 344.4774475097656], "spans": [[0, 1]], "text": "*JOBCTL", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [450.1380615234375, 304.9800109863281, 458.4629821777344, 390.3999328613281], "spans": [[0, 2]], "text": "QIBM_DB_SECADM", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [475.9383544921875, 304.9800109863281, 484.2632751464844, 390.465576171875], "spans": [[0, 3]], "text": "QIBM_DB_SQLADM", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [501.13836669921875, 304.9799499511719, 534.7235717773438, 390.385498046875], "spans": [[0, 4]], "text": "QIBM_DB_SYSMON No Authority", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [70.80000305175781, 285.11700439453125, 220.1568145751953, 293.4420166015625], "spans": [[1, 0]], "text": "SET CURRENT DEGREE (SQL statement)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [429.0, 285.11700439453125, 435.00299072265625, 293.4420166015625], "spans": [[1, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [480.00030517578125, 285.11700439453125, 486.0032958984375, 293.4420166015625], "spans": [[1, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [70.80001831054688, 266.1567077636719, 264.5538024902344, 274.4817199707031], "spans": [[2, 0]], "text": "CHGQRYA command targeting a different user's job", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [429.0000305175781, 266.1567077636719, 435.0030212402344, 274.4817199707031], "spans": [[2, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [480.0003356933594, 266.1567077636719, 486.0033264160156, 274.4817199707031], "spans": [[2, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [70.800048828125, 247.1370086669922, 322.5057373046875, 255.46202087402344], "spans": [[3, 0]], "text": "STRDBMON or ENDDBMON commands targeting a different user's job", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [429.0000305175781, 247.1370086669922, 435.0030212402344, 255.46202087402344], "spans": [[3, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [480.0003356933594, 247.1370086669922, 486.0033264160156, 255.46202087402344], "spans": [[3, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [70.800048828125, 228.1173095703125, 381.0218505859375, 236.44232177734375], "spans": [[4, 0]], "text": "STRDBMON or ENDDBMON commands targeting a job that matches the current user", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [429.0000305175781, 228.1173095703125, 435.0030212402344, 236.44232177734375], "spans": [[4, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [480.0003356933594, 228.1173095703125, 511.26361083984375, 236.44232177734375], "spans": [[4, 3]], "text": "X X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [530.7603149414062, 228.1173095703125, 536.7633056640625, 236.44232177734375], "spans": [[4, 4]], "text": "X", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [70.800048828125, 209.15701293945312, 359.5173645019531, 217.48202514648438], "spans": [[5, 0]], "text": "QUSRJOBI() API format 900 or System i Navigator's SQL Details for Job", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [429.00006103515625, 209.15701293945312, 435.0030517578125, 217.48202514648438], "spans": [[5, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [480.0003662109375, 209.15701293945312, 486.00335693359375, 217.48202514648438], "spans": [[5, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [505.26068115234375, 209.15701293945312, 511.263671875, 217.48202514648438], "spans": [[5, 4]], "text": "X", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [70.80007934570312, 190.13731384277344, 220.7517852783203, 198.4623260498047], "spans": [[6, 0]], "text": "Visual Explain within Run SQL scripts", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [429.00006103515625, 190.13731384277344, 435.0030517578125, 198.4623260498047], "spans": [[6, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [480.0003662109375, 190.13731384277344, 486.00335693359375, 198.4623260498047], "spans": [[6, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [505.26068115234375, 190.13731384277344, 536.7633666992188, 198.4623260498047], "spans": [[6, 4]], "text": "X X", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [70.80007934570312, 171.11761474609375, 236.65480041503906, 179.442626953125], "spans": [[7, 0]], "text": "Visual Explain outside of Run SQL scripts", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [429.00006103515625, 171.11761474609375, 435.0030517578125, 179.442626953125], "spans": [[7, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [480.0003662109375, 171.11761474609375, 486.00335693359375, 179.442626953125], "spans": [[7, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [70.80007934570312, 152.15731811523438, 213.1296844482422, 160.48233032226562], "spans": [[8, 0]], "text": "ANALYZE PLAN CACHE procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [429.00006103515625, 152.15731811523438, 435.0030517578125, 160.48233032226562], "spans": [[8, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [480.0003662109375, 152.15731811523438, 486.00335693359375, 160.48233032226562], "spans": [[8, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [70.80007934570312, 133.1376190185547, 199.87808227539062, 141.46263122558594], "spans": [[9, 0]], "text": "DUMP PLAN CACHE procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [429.00006103515625, 133.1376190185547, 435.0030517578125, 141.46263122558594], "spans": [[9, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [480.0003662109375, 133.1376190185547, 486.00335693359375, 141.46263122558594], "spans": [[9, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [70.80007934570312, 114.11792755126953, 208.36776733398438, 122.44291687011719], "spans": [[10, 0]], "text": "MODIFY PLAN CACHE procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [429.00006103515625, 114.11792755126953, 435.0030517578125, 122.44291687011719], "spans": [[10, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [480.0003662109375, 114.11792755126953, 486.00335693359375, 122.44291687011719], "spans": [[10, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [70.80007934570312, 95.09822845458984, 411.20263671875, 103.42323303222656], "spans": [[11, 0]], "text": "MODIFY PLAN CACHE PROPERTIES procedure (currently does not check authority)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": null, "spans": [[12, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [429.00006103515625, 95.09822845458984, 435.0030517578125, 103.42323303222656], "spans": [[12, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [480.0003662109375, 95.09822845458984, 486.00335693359375, 103.42323303222656], "spans": [[12, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [70.80007934570312, 76.13793182373047, 377.1258544921875, 84.46292877197266], "spans": [[13, 0]], "text": "CHANGE PLAN CACHE SIZE procedure (currently does not check authority)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [429.00006103515625, 76.13793182373047, 435.0030517578125, 84.46292877197266], "spans": [[13, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [480.0003662109375, 76.13793182373047, 486.00335693359375, 84.46292877197266], "spans": [[13, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [63.80680847167969, 222.05039978027344, 547.7899169921875, 720.9105224609375], "page": 28, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 6, "#-rows": 22, "data": [[{"bbox": [70.80136108398438, 706.1392822265625, 119.78656768798828, 714.4642333984375], "spans": [[0, 0]], "text": "User action", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [424.93804931640625, 628.02001953125, 433.262939453125, 667.4706420898438], "spans": [[0, 1]], "text": "*JOBCTL", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [450.1380615234375, 628.02001953125, 458.46295166015625, 713.3759765625], "spans": [[0, 2]], "text": "QIBM_DB_SECADM", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [475.9383544921875, 628.02001953125, 484.26324462890625, 713.3759765625], "spans": [[0, 3]], "text": "QIBM_DB_SQLADM", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [501.13836669921875, 628.02001953125, 509.4632568359375, 713.437255859375], "spans": [[0, 4]], "text": "QIBM_DB_SYSMON", "type": "col_header", "col": 4, "col-header": false, "col-span": [4, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [526.3987426757812, 628.02001953125, 534.7235107421875, 682.131591796875], "spans": [[0, 5]], "text": "No Authority", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 6], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [70.80060577392578, 608.1573486328125, 278.5827331542969, 616.4822998046875], "spans": [[1, 0]], "text": "START PLAN CACHE EVENT MONITOR procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [429.0005798339844, 608.1573486328125, 435.0035705566406, 616.4822998046875], "spans": [[1, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [480.0008850097656, 608.1573486328125, 486.0038757324219, 616.4822998046875], "spans": [[1, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [70.80059814453125, 589.1376342773438, 269.4494934082031, 597.4625854492188], "spans": [[2, 0]], "text": "END PLAN CACHE EVENT MONITOR procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [429.0005798339844, 589.1376342773438, 435.0035705566406, 597.4625854492188], "spans": [[2, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [480.0008850097656, 589.1376342773438, 486.0038757324219, 597.4625854492188], "spans": [[2, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": null, "spans": [[2, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [70.80059814453125, 570.117919921875, 293.976318359375, 578.44287109375], "spans": [[3, 0]], "text": "END ALL PLAN CACHE EVENT MONITORS procedure", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [429.0005798339844, 570.117919921875, 435.0035705566406, 578.44287109375], "spans": [[3, 1]], "text": "X", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [480.0008850097656, 570.117919921875, 486.0038757324219, 578.44287109375], "spans": [[3, 3]], "text": "X", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": null, "spans": [[3, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [70.80059814453125, 551.1575927734375, 311.2257385253906, 559.4825439453125], "spans": [[4, 0]], "text": "Work with RCAC row permissions (Create, modify, or delete)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [454.50030517578125, 551.1575927734375, 460.5032958984375, 559.4825439453125], "spans": [[4, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": null, "spans": [[4, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [70.80059814453125, 532.1378784179688, 303.5882873535156, 540.4628295898438], "spans": [[5, 0]], "text": "Work with RCAC column masks (Create, modify, or delete)", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [454.50030517578125, 532.1378784179688, 460.5032958984375, 540.4628295898438], "spans": [[5, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [70.80059814453125, 513.1181640625, 264.57958984375, 521.443115234375], "spans": [[6, 0]], "text": "Change Object Owner ( CHGOBJOWN ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [454.50030517578125, 513.1181640625, 460.5032958984375, 521.443115234375], "spans": [[6, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": null, "spans": [[6, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [70.80059814453125, 494.1578369140625, 299.39697265625, 502.48284912109375], "spans": [[7, 0]], "text": "Change Object Primary Group ( CHGOBJPGP ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [454.50030517578125, 494.1578369140625, 460.5032958984375, 502.48284912109375], "spans": [[7, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": null, "spans": [[7, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [70.80059814453125, 475.13812255859375, 266.843994140625, 483.463134765625], "spans": [[8, 0]], "text": "Grant Object Authority ( GRTOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [454.5002746582031, 475.13812255859375, 460.5032653808594, 483.463134765625], "spans": [[8, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": null, "spans": [[8, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [70.80056762695312, 456.118408203125, 271.78857421875, 464.44342041015625], "spans": [[9, 0]], "text": "Revoke Object Authority ( RVKOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [454.500244140625, 456.118408203125, 460.50323486328125, 464.44342041015625], "spans": [[9, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": null, "spans": [[9, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [70.800537109375, 437.1581115722656, 257.3543395996094, 445.4831237792969], "spans": [[10, 0]], "text": "Edit Object Authority ( EDTOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [454.500244140625, 437.1581115722656, 460.50323486328125, 445.4831237792969], "spans": [[10, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": null, "spans": [[10, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 10, "row-header": false, "row-span": [10, 11]}], [{"bbox": [70.800537109375, 418.1383972167969, 271.1882629394531, 426.4634094238281], "spans": [[11, 0]], "text": "Display Object Authority ( DSPOBJAUT ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": [454.500244140625, 418.1383972167969, 460.50323486328125, 426.4634094238281], "spans": [[11, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 11, "row-header": false, "row-span": [11, 12]}, {"bbox": null, "spans": [[11, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 11, "row-header": false, "row-span": [11, 12]}], [{"bbox": [70.800537109375, 399.1186828613281, 237.0242462158203, 407.4436950683594], "spans": [[12, 0]], "text": "Work with Objects ( WRKOBJ ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": [454.500244140625, 399.1186828613281, 460.50323486328125, 407.4436950683594], "spans": [[12, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 12, "row-header": false, "row-span": [12, 13]}, {"bbox": null, "spans": [[12, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 12, "row-header": false, "row-span": [12, 13]}], [{"bbox": [70.800537109375, 380.15838623046875, 238.51824951171875, 388.4833984375], "spans": [[13, 0]], "text": "Work with Libraries ( WRKLIB ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": [454.5011291503906, 380.15838623046875, 460.5041198730469, 388.4833984375], "spans": [[13, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 13, "row-header": false, "row-span": [13, 14]}, {"bbox": null, "spans": [[13, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 13, "row-header": false, "row-span": [13, 14]}], [{"bbox": [70.80142211914062, 361.138671875, 284.7251281738281, 369.46368408203125], "spans": [[14, 0]], "text": "Add Authorization List Entry ( ADDAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": [454.5010986328125, 361.138671875, 460.50408935546875, 369.46368408203125], "spans": [[14, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 14, "row-header": false, "row-span": [14, 15]}, {"bbox": null, "spans": [[14, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 14, "row-header": false, "row-span": [14, 15]}], [{"bbox": [70.8013916015625, 342.11895751953125, 297.70037841796875, 350.4439697265625], "spans": [[15, 0]], "text": "Change Authorization List Entry ( CHGAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": [454.5010986328125, 342.11895751953125, 460.50408935546875, 350.4439697265625], "spans": [[15, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 15, "row-header": false, "row-span": [15, 16]}, {"bbox": null, "spans": [[15, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 15, "row-header": false, "row-span": [15, 16]}], [{"bbox": [70.8013916015625, 323.1586608886719, 299.32037353515625, 331.4836730957031], "spans": [[16, 0]], "text": "Remove Authorization List Entry ( RMVAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": [454.5010986328125, 323.1586608886719, 460.50408935546875, 331.4836730957031], "spans": [[16, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 16, "row-header": false, "row-span": [16, 17]}, {"bbox": null, "spans": [[16, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 16, "row-header": false, "row-span": [16, 17]}], [{"bbox": [70.8013916015625, 304.1389465332031, 299.32037353515625, 312.4639587402344], "spans": [[17, 0]], "text": "Retrieve Authorization List Entry ( RTVAUTLE ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": [454.5010986328125, 304.1389465332031, 460.50408935546875, 312.4639587402344], "spans": [[17, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 17, "row-header": false, "row-span": [17, 18]}, {"bbox": null, "spans": [[17, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 17, "row-header": false, "row-span": [17, 18]}], [{"bbox": [70.8013916015625, 285.1192321777344, 269.78509521484375, 293.4442443847656], "spans": [[18, 0]], "text": "Display Authorization List ( DSPAUTL ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": [454.5010986328125, 285.1192321777344, 460.50408935546875, 293.4442443847656], "spans": [[18, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 18, "row-header": false, "row-span": [18, 19]}, {"bbox": null, "spans": [[18, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 18, "row-header": false, "row-span": [18, 19]}], [{"bbox": [70.8013916015625, 266.158935546875, 313.63848876953125, 274.48394775390625], "spans": [[19, 0]], "text": "Display Authorization List Objects ( DSPAUTLOBJ ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": [454.5010986328125, 266.158935546875, 460.50408935546875, 274.48394775390625], "spans": [[19, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 19, "row-header": false, "row-span": [19, 20]}, {"bbox": null, "spans": [[19, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 19, "row-header": false, "row-span": [19, 20]}], [{"bbox": [70.8013916015625, 247.1392364501953, 253.48878479003906, 255.46424865722656], "spans": [[20, 0]], "text": "Edit Authorization List ( EDTAUTL ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": [454.5010681152344, 247.1392364501953, 460.5040588378906, 255.46424865722656], "spans": [[20, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 20, "row-header": false, "row-span": [20, 21]}, {"bbox": null, "spans": [[20, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 20, "row-header": false, "row-span": [20, 21]}], [{"bbox": [70.80136108398438, 228.11953735351562, 281.80908203125, 236.44454956054688], "spans": [[21, 0]], "text": "Work with Authorization Lists ( WRKAUTL ) CL command", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": [454.5010681152344, 228.11953735351562, 460.5040588378906, 236.44454956054688], "spans": [[21, 2]], "text": "X", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 3]], "text": "", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 4]], "text": "", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 21, "row-header": false, "row-span": [21, 22]}, {"bbox": null, "spans": [[21, 5]], "text": "", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 21, "row-header": false, "row-span": [21, 22]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [135.69085693359375, 588.1738891601562, 542.3914184570312, 687.73828125], "page": 35, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3-1 Special registers and their corresponding values", "type": "table", "#-cols": 2, "#-rows": 4, "data": [[{"bbox": [142.8000030517578, 673.1370239257812, 209.67091369628906, 681.4619750976562], "spans": [[0, 0]], "text": "Special register", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [230.18911743164062, 673.1370239257812, 319.9352722167969, 681.4619750976562], "spans": [[0, 1]], "text": "Corresponding value", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [142.80001831054688, 643.1364135742188, 212.7012176513672, 662.5016479492188], "spans": [[1, 0]], "text": "USER or SESSION_USER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [230.2197265625, 654.1766967773438, 467.9906921386719, 662.5016479492188], "spans": [[1, 1]], "text": "The effective user of the thread excluding adopted authority.", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [142.80003356933594, 624.11669921875, 216.63963317871094, 632.441650390625], "spans": [[2, 0]], "text": "CURRENT_USER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [230.19813537597656, 613.13671875, 535.6508178710938, 632.441650390625], "spans": [[2, 1]], "text": "The effective user of the thread including adopted authority. When no adopted authority is present, this has the same value as USER.", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [142.8009033203125, 594.1170043945312, 209.73570251464844, 602.4419555664062], "spans": [[3, 0]], "text": "SYSTEM_USER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [230.24490356445312, 594.1170043945312, 425.64569091796875, 602.4419555664062], "spans": [[3, 1]], "text": "The authorization ID that initiated the connection.", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [63.562129974365234, 496.5521545410156, 548.4708862304688, 687.6392822265625], "page": 36, "span": [0, 0], "__ref_s3_data": null}], "text": "Table 3-2 Built-in global variables", "type": "table", "#-cols": 3, "#-rows": 10, "data": [[{"bbox": [70.80000305175781, 673.1370239257812, 134.99070739746094, 681.4619750976562], "spans": [[0, 0]], "text": "Global variable", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [202.889404296875, 673.1370239257812, 223.34640502929688, 681.4619750976562], "spans": [[0, 1]], "text": "Type", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [281.8247985839844, 673.1370239257812, 331.3428039550781, 681.4619750976562], "spans": [[0, 2]], "text": "Description", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [70.80000305175781, 654.1766967773438, 132.7209014892578, 662.5016479492188], "spans": [[1, 0]], "text": "CLIENT_HOST", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [202.89028930664062, 654.1766967773438, 267.0765075683594, 662.5016479492188], "spans": [[1, 1]], "text": "VARCHAR(255)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [281.8473205566406, 654.1766967773438, 510.17547607421875, 662.5016479492188], "spans": [[1, 2]], "text": "Host name of the current client as returned by the system", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [70.80001831054688, 635.156982421875, 140.66522216796875, 643.48193359375], "spans": [[2, 0]], "text": "CLIENT_IPADDR", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [202.872314453125, 635.156982421875, 267.077392578125, 643.48193359375], "spans": [[2, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [281.8454895019531, 635.156982421875, 509.6058349609375, 643.48193359375], "spans": [[2, 2]], "text": "IP address of the current client as returned by the system", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [70.80001831054688, 616.1372680664062, 134.98263549804688, 624.4622192382812], "spans": [[3, 0]], "text": "CLIENT_PORT", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [202.90293884277344, 616.1372680664062, 242.80084228515625, 624.4622192382812], "spans": [[3, 1]], "text": "INTEGER", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [281.7978515625, 616.1372680664062, 527.5922241210938, 624.4622192382812], "spans": [[3, 2]], "text": "Port used by the current client to communicate with the server", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [70.80001831054688, 597.1175537109375, 143.50924682617188, 605.4425048828125], "spans": [[4, 0]], "text": "PACKAGE_NAME", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [202.80575561523438, 597.1175537109375, 267.0693664550781, 605.4425048828125], "spans": [[4, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [281.85186767578125, 597.1175537109375, 436.5726013183594, 605.4425048828125], "spans": [[4, 2]], "text": "Name of the currently running package", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [70.80001831054688, 578.1572265625, 156.01654052734375, 586.482177734375], "spans": [[5, 0]], "text": "PACKAGE_SCHEMA", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [202.83544921875, 578.1572265625, 267.0864562988281, 586.482177734375], "spans": [[5, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [281.8707580566406, 578.1572265625, 470.44677734375, 586.482177734375], "spans": [[5, 2]], "text": "Schema name of the currently running package", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [70.80001831054688, 559.1375122070312, 157.89932250976562, 567.4624633789062], "spans": [[6, 0]], "text": "PACKAGE_VERSION", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [202.72471618652344, 559.1375122070312, 261.9825439453125, 567.4624633789062], "spans": [[6, 1]], "text": "VARCHAR(64)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [281.7492370605469, 559.1375122070312, 478.84381103515625, 567.4624633789062], "spans": [[6, 2]], "text": "Version identifier of the currently running package", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [70.80001831054688, 540.1177978515625, 154.419921875, 548.4427490234375], "spans": [[7, 0]], "text": "ROUTINE_SCHEMA", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [202.79312133789062, 540.1177978515625, 267.0927429199219, 548.4427490234375], "spans": [[7, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [281.87164306640625, 540.1177978515625, 464.2602233886719, 548.4427490234375], "spans": [[7, 2]], "text": "Schema name of the currently running routine", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [70.80001831054688, 521.157470703125, 188.43991088867188, 529.482421875], "spans": [[8, 0]], "text": "ROUTINE_SPECIFIC_NAME", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [202.8444061279297, 521.157470703125, 267.03692626953125, 529.482421875], "spans": [[8, 1]], "text": "VARCHAR(128)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [281.80682373046875, 521.157470703125, 430.40045166015625, 529.482421875], "spans": [[8, 2]], "text": "Name of the currently running routine", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [70.80003356933594, 502.1377258300781, 139.4313507080078, 510.4627380371094], "spans": [[9, 0]], "text": "ROUTINE_TYPE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [202.74635314941406, 502.1377258300781, 239.2899627685547, 510.4627380371094], "spans": [[9, 1]], "text": "CHAR(1)", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [281.7906494140625, 502.1377258300781, 425.09130859375, 510.4627380371094], "spans": [[9, 2]], "text": "Type of the currently running routine", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [135.90414428710938, 86.54425811767578, 548.0491333007812, 132.3650665283203], "page": 37, "span": [0, 0], "__ref_s3_data": null}], "text": "Example 3-1 Subquery that is used as part of the rule", "type": "table", "#-cols": 2, "#-rows": 2, "data": [[{"bbox": [136.8000030517578, 116.547119140625, 316.67755126953125, 125.3218765258789], "spans": [[0, 0]], "text": "CURRENT_DATE IN (SELECT D.DATE_KEY", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [222.19522094726562, 92.54750061035156, 371.6368408203125, 113.32206726074219], "spans": [[0, 1]], "text": "DATE_MASTER D D.BUSINESS_DAY = 'Y')", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [172.38134765625, 92.54750061035156, 209.87899780273438, 113.32206726074219], "spans": [[1, 0]], "text": "FROM WHERE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": null, "spans": [[1, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [136.10394287109375, 62.45356750488281, 416.6361083984375, 98.12852478027344], "page": 42, "span": [0, 0], "__ref_s3_data": null}], "text": "Example 3-8 Creation of a mask on the DATE_OF_BIRTH column", "type": "table", "#-cols": 2, "#-rows": 3, "data": [[{"bbox": [136.8000030517578, 87.80712127685547, 193.80364990234375, 96.58187866210938], "spans": [[0, 0]], "text": "CREATE MASK", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [204.16795349121094, 87.80712127685547, 416.6361083984375, 96.58187866210938], "spans": [[0, 1]], "text": "HR_SCHEMA.MASK_DATE_OF_BIRTH_ON_EMPLOYEES", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [136.8000030517578, 75.80731201171875, 148.79383850097656, 84.58206939697266], "spans": [[1, 0]], "text": "ON", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [178.77841186523438, 75.80731201171875, 376.6766052246094, 84.58206939697266], "spans": [[1, 1]], "text": "HR_SCHEMA.EMPLOYEES AS EMPLOYEES", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [136.8000030517578, 63.80750274658203, 192.76722717285156, 72.58226013183594], "spans": [[2, 0]], "text": "FOR COLUMN", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [203.96066284179688, 63.80750274658203, 276.7180480957031, 72.58226013183594], "spans": [[2, 1]], "text": "DATE_OF_BIRTH", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [136.4228515625, 386.3492126464844, 529.3878173828125, 671.8422241210938], "page": 56, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 4-2 Rules for row and column access", "type": "table", "#-cols": 7, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [224.1096954345703, 653.607177734375, 293.8129577636719, 665.3426513671875], "spans": [[0, 1], [0, 2]], "text": "CUSTOMERS", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [224.1096954345703, 653.607177734375, 293.8129577636719, 665.3426513671875], "spans": [[0, 1], [0, 2]], "text": "CUSTOMERS", "type": "col_header", "col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [334.76220703125, 653.607177734375, 396.792724609375, 665.3426513671875], "spans": [[0, 3], [0, 4]], "text": "ACCOUNTS", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [334.76220703125, 653.607177734375, 396.792724609375, 665.3426513671875], "spans": [[0, 3], [0, 4]], "text": "ACCOUNTS", "type": "col_header", "col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [426.6241455078125, 653.607177734375, 513.3307495117188, 665.3426513671875], "spans": [[0, 5], [0, 6]], "text": "TRANSACTIONS", "type": "col_header", "col": 5, "col-header": false, "col-span": [5, 7], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [426.6241455078125, 653.607177734375, 513.3307495117188, 665.3426513671875], "spans": [[0, 5], [0, 6]], "text": "TRANSACTIONS", "type": "col_header", "col": 6, "col-header": false, "col-span": [5, 7], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [150.43690490722656, 597.8072509765625, 194.79078674316406, 607.5770874023438], "spans": [[1, 0]], "text": "SECURITY", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [213.2664031982422, 598.4434814453125, 250.56985473632812, 607.2554321289062], "spans": [[1, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [277.6850280761719, 598.4434814453125, 291.3363037109375, 607.2554321289062], "spans": [[1, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [318.52044677734375, 598.4434814453125, 355.8239440917969, 607.2554321289062], "spans": [[1, 3]], "text": "No Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [382.9391174316406, 598.4434814453125, 396.59039306640625, 607.2554321289062], "spans": [[1, 4]], "text": "Yes", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [423.7405700683594, 598.4434814453125, 461.0440673828125, 607.2554321289062], "spans": [[1, 5]], "text": "No Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [488.9488220214844, 598.4434814453125, 501.11669921875, 607.2554321289062], "spans": [[1, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [175.966796875, 559.4096069335938, 194.7865753173828, 569.179443359375], "spans": [[2, 0]], "text": "DBE", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [213.78109741210938, 560.1143798828125, 250.2053985595703, 568.9263305664062], "spans": [[2, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [277.6911926269531, 560.1143798828125, 291.5215759277344, 568.9263305664062], "spans": [[2, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [319.04132080078125, 560.1143798828125, 355.4615173339844, 568.9263305664062], "spans": [[2, 3]], "text": "All Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [382.9472961425781, 560.1143798828125, 396.7776794433594, 568.9263305664062], "spans": [[2, 4]], "text": "Yes", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [424.2634582519531, 560.1143798828125, 460.68365478515625, 568.9263305664062], "spans": [[2, 5]], "text": "All Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [488.9579772949219, 560.1143798828125, 501.30291748046875, 568.9263305664062], "spans": [[2, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [161.10870361328125, 520.9776000976562, 194.82318115234375, 530.7474365234375], "spans": [[3, 0]], "text": "ADMIN", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [213.78109741210938, 521.785400390625, 250.2053985595703, 530.5973510742188], "spans": [[3, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [278.44573974609375, 521.785400390625, 290.7906494140625, 530.5973510742188], "spans": [[3, 2]], "text": "No", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [319.04437255859375, 521.785400390625, 355.46868896484375, 530.5973510742188], "spans": [[3, 3]], "text": "All Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [383.70904541015625, 521.785400390625, 396.053955078125, 530.5973510742188], "spans": [[3, 4]], "text": "No", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [424.2737121582031, 521.785400390625, 460.6980285644531, 530.5973510742188], "spans": [[3, 5]], "text": "All Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [488.97235107421875, 521.785400390625, 501.3172607421875, 530.5973510742188], "spans": [[3, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [162.24099731445312, 482.5798645019531, 194.78195190429688, 492.3497009277344], "spans": [[4, 0]], "text": "TELLER", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [213.78109741210938, 483.4906005859375, 250.2053985595703, 492.3025207519531], "spans": [[4, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [277.6911926269531, 483.4906005859375, 291.5215759277344, 492.3025207519531], "spans": [[4, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [319.04132080078125, 483.4906005859375, 355.4615173339844, 492.3025207519531], "spans": [[4, 3]], "text": "All Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [383.7018737792969, 483.4906005859375, 396.04681396484375, 492.3025207519531], "spans": [[4, 4]], "text": "No", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [424.2665710449219, 483.4906005859375, 460.6908874511719, 492.3025207519531], "spans": [[4, 5]], "text": "All Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [488.9652099609375, 483.4906005859375, 501.31011962890625, 492.3025207519531], "spans": [[4, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [141.78970336914062, 444.18218994140625, 194.802734375, 453.9520263671875], "spans": [[5, 0]], "text": "CUSTOMER", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [220.57534790039062, 438.9849548339844, 244.4169464111328, 460.1500244140625], "spans": [[5, 1]], "text": "Own Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [278.4293212890625, 445.1615295410156, 290.7783508300781, 453.97344970703125], "spans": [[5, 2]], "text": "No", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [325.81707763671875, 438.9849548339844, 349.65869140625, 460.1500244140625], "spans": [[5, 3]], "text": "Own Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [383.6710510253906, 445.1615295410156, 396.02008056640625, 453.97344970703125], "spans": [[5, 4]], "text": "No", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [431.02484130859375, 438.9849548339844, 454.866455078125, 460.1500244140625], "spans": [[5, 5]], "text": "Own Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [488.91278076171875, 445.1615295410156, 501.2618103027344, 453.97344970703125], "spans": [[5, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [161.55479431152344, 405.78448486328125, 194.79734802246094, 415.5543212890625], "spans": [[6, 0]], "text": "PUBLIC", "type": "row_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [213.2664031982422, 406.8324890136719, 250.56985473632812, 415.6444091796875], "spans": [[6, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [277.6850280761719, 406.8324890136719, 291.3363037109375, 415.6444091796875], "spans": [[6, 2]], "text": "Yes", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [318.52044677734375, 406.8324890136719, 355.8239440917969, 415.6444091796875], "spans": [[6, 3]], "text": "No Rows", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [382.9391174316406, 406.8324890136719, 396.59039306640625, 415.6444091796875], "spans": [[6, 4]], "text": "Yes", "type": "body", "col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [423.7405700683594, 406.8324890136719, 461.0440673828125, 415.6444091796875], "spans": [[6, 5]], "text": "No Rows", "type": "body", "col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [488.9488220214844, 406.8324890136719, 501.11669921875, 415.6444091796875], "spans": [[6, 6]], "text": "No", "type": "body", "col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [136.258056640625, 393.7317199707031, 529.4730224609375, 684.1337280273438], "page": 57, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 4-3 Column masks", "type": "table", "#-cols": 4, "#-rows": 7, "data": [[{"bbox": null, "spans": [[0, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": null, "spans": [[0, 1]], "text": "", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [287.8096923828125, 668.81640625, 355.4244689941406, 680.1975708007812], "spans": [[0, 2]], "text": "CUSTOMERS", "type": "col_header", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [428.24420166015625, 668.81640625, 488.26617431640625, 680.1975708007812], "spans": [[0, 3]], "text": "ACCOUNTS", "type": "col_header", "col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [150.03750610351562, 608.87744140625, 193.05224609375, 618.352294921875], "spans": [[1, 0]], "text": "SECURITY", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [212.63400268554688, 607.9304809570312, 248.9210205078125, 616.476318359375], "spans": [[1, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [263.1838073730469, 589.69384765625, 382.3654479980469, 635.3184814453125], "spans": [[1, 2]], "text": "CUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_ID", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [427.81256103515625, 609.6608276367188, 482.86053466796875, 615.3514404296875], "spans": [[1, 3]], "text": "ACCOUNT_NUMBER", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [174.79660034179688, 555.6320190429688, 193.04815673828125, 565.1068725585938], "spans": [[2, 0]], "text": "DBE", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [213.1331024169922, 556.0161743164062, 248.37786865234375, 564.56201171875], "spans": [[2, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [263.1838073730469, 537.7796020507812, 382.3654479980469, 583.404296875], "spans": [[2, 2]], "text": "CUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_ID", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [427.8116149902344, 557.7466430664062, 482.92327880859375, 563.437255859375], "spans": [[2, 3]], "text": "ACCOUNT NUMBER ACCOUNT_NUMBER", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [160.38710021972656, 521.521728515625, 193.08364868164062, 530.99658203125], "spans": [[3, 0]], "text": "ADMIN", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [213.1331024169922, 520.4083862304688, 248.45372009277344, 528.9542236328125], "spans": [[3, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [315.5306091308594, 522.1387329101562, 330.12255859375, 527.829345703125], "spans": [[3, 2]], "text": "None", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [448.0835266113281, 522.1387329101562, 462.67547607421875, 527.829345703125], "spans": [[3, 3]], "text": "None", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [161.48519897460938, 489.0753479003906, 193.04367065429688, 498.5502014160156], "spans": [[4, 0]], "text": "TELLER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [213.1331024169922, 488.7939453125, 248.45372009277344, 497.3398132324219], "spans": [[4, 1]], "text": "All Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [263.1838073730469, 474.550537109375, 382.3654479980469, 512.1885375976562], "spans": [[4, 2]], "text": "CUSTOMER_EMAIL CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER TAX ID _ _", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [448.07916259765625, 490.52423095703125, 462.6711120605469, 496.2148742675781], "spans": [[4, 3]], "text": "None", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [141.65139770507812, 457.7271423339844, 193.06382751464844, 467.2019958496094], "spans": [[5, 0]], "text": "CUSTOMER", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [208.84030151367188, 457.179443359375, 252.7267608642578, 465.7253112792969], "spans": [[5, 1]], "text": "Own Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [315.5306091308594, 458.9099426269531, 330.12255859375, 464.6005859375], "spans": [[5, 2]], "text": "None", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [448.0835266113281, 458.9099426269531, 462.67547607421875, 464.6005859375], "spans": [[5, 3]], "text": "None", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [160.8197021484375, 422.5186462402344, 193.05859375, 431.9934997558594], "spans": [[6, 0]], "text": "PUBLIC", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [212.63400268554688, 421.5716247558594, 248.9210205078125, 430.11749267578125], "spans": [[6, 1]], "text": "No Rows", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [263.18353271484375, 403.33502197265625, 382.3654479980469, 448.9596862792969], "spans": [[6, 2]], "text": "CUSTOMER_DRIVERS_LICENSE_NUMBER CUSTOMER_EMAIL CUSTOMER LOGIN ID CUSTOMER_LOGIN_ID CUSTOMER_SECURITY_QUESTION CUSTOMER_SECURITY_QUESTION_ANSWER CUSTOMER_TAX_ID", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [427.81256103515625, 423.3021240234375, 482.86053466796875, 428.9927673339844], "spans": [[6, 3]], "text": "ACCOUNT_NUMBER", "type": "body", "col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [142.8543243408203, 328.035400390625, 299.9855041503906, 479.80316162109375], "page": 102, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 2, "#-rows": 11, "data": [[{"bbox": [149.45120239257812, 467.8338623046875, 233.62379455566406, 474.97100830078125], "spans": [[0, 0]], "text": "CREDIT CARD NUMBER _ _", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [257.74920654296875, 467.8338623046875, 279.168212890625, 474.97100830078125], "spans": [[0, 1]], "text": "TOTAL", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [148.50592041015625, 454.30743408203125, 221.83938598632812, 461.4362487792969], "spans": [[1, 0]], "text": "3785 0000 0000 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [272.42230224609375, 454.30743408203125, 295.6497497558594, 461.4362487792969], "spans": [[1, 1]], "text": "233.50", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [148.50592041015625, 440.8002014160156, 221.83853149414062, 447.92901611328125], "spans": [[2, 0]], "text": "3785 1111 1111 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [272.42144775390625, 440.8002014160156, 295.6488952636719, 447.92901611328125], "spans": [[2, 1]], "text": "105.10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [148.50619506835938, 427.264892578125, 221.84132385253906, 434.3937072753906], "spans": [[3, 0]], "text": "3785 2222 2222 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [272.4057922363281, 427.264892578125, 295.6465759277344, 434.3937072753906], "spans": [[3, 1]], "text": "300 00 300.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [148.50619506835938, 413.7298889160156, 221.85214233398438, 420.85870361328125], "spans": [[4, 0]], "text": "3785 3333 3333 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [266.1250305175781, 413.7298889160156, 295.6675109863281, 420.85870361328125], "spans": [[4, 1]], "text": "1,775.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [148.50619506835938, 400.22265625, 221.83880615234375, 407.3514709472656], "spans": [[5, 0]], "text": "5466 4444 4444 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [272.4217529296875, 400.22265625, 295.6492004394531, 407.3514709472656], "spans": [[5, 1]], "text": "601.70", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [148.50619506835938, 386.6877136230469, 221.83880615234375, 393.8165283203125], "spans": [[6, 0]], "text": "5466 5555 5555 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [276.646484375, 386.6877136230469, 295.6483154296875, 393.8165283203125], "spans": [[6, 1]], "text": "37.80", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [148.50619506835938, 373.1529541015625, 221.83880615234375, 380.2817687988281], "spans": [[7, 0]], "text": "5466 6666 6666 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [272.4217529296875, 373.1529541015625, 295.6492004394531, 380.2817687988281], "spans": [[7, 1]], "text": "490.45", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [148.50619506835938, 359.6453857421875, 221.84132385253906, 366.7742004394531], "spans": [[8, 0]], "text": "6011 7777 7777 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [268.1813049316406, 359.6453857421875, 295.6460266113281, 366.7742004394531], "spans": [[8, 1]], "text": "1005.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [148.50619506835938, 346.11041259765625, 221.83880615234375, 353.2392272949219], "spans": [[9, 0]], "text": "6011 8888 8888 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [272.4217529296875, 346.11041259765625, 295.6492004394531, 353.2392272949219], "spans": [[9, 1]], "text": "750.33", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [148.50619506835938, 332.5756530761719, 221.83880615234375, 339.7044677734375], "spans": [[10, 0]], "text": "6011 9999 9999 0001", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [276.646484375, 332.5756530761719, 295.6483154296875, 339.7044677734375], "spans": [[10, 1]], "text": "10.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}]], "model": null, "bounding-box": null}, {"prov": [{"bbox": [313.2283020019531, 328.81744384765625, 469.1299743652344, 479.4169006347656], "page": 102, "span": [0, 0], "__ref_s3_data": null}], "text": "Figure 6-1 Timing of column masking", "type": "table", "#-cols": 2, "#-rows": 11, "data": [[{"bbox": [318.9862060546875, 467.8338623046875, 403.1588134765625, 474.97100830078125], "spans": [[0, 0]], "text": "CREDIT CARD NUMBER _ _", "type": "col_header", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [427.28424072265625, 467.8338623046875, 448.7032470703125, 474.97100830078125], "spans": [[0, 1]], "text": "TOTAL", "type": "col_header", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": [318.041015625, 454.30743408203125, 390.6849365234375, 461.4362487792969], "spans": [[1, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [441.9682312011719, 454.30743408203125, 465.16064453125, 461.4362487792969], "spans": [[1, 1]], "text": "233.50", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [318.041015625, 440.8002014160156, 390.6849365234375, 447.92901611328125], "spans": [[2, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [441.9682312011719, 440.8002014160156, 465.16064453125, 447.92901611328125], "spans": [[2, 1]], "text": "105.10", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [318.0412902832031, 427.264892578125, 390.6543273925781, 434.3937072753906], "spans": [[3, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [441.9408874511719, 427.264892578125, 465.1816711425781, 434.3937072753906], "spans": [[3, 1]], "text": "300 00 300.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [318.0412902832031, 413.7298889160156, 390.69854736328125, 420.85870361328125], "spans": [[4, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [435.6726379394531, 413.7298889160156, 465.1684265136719, 420.85870361328125], "spans": [[4, 1]], "text": "1,775.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [318.0412902832031, 400.22265625, 390.6852111816406, 407.3514709472656], "spans": [[5, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [441.968505859375, 400.22265625, 465.1609191894531, 407.3514709472656], "spans": [[5, 1]], "text": "601.70", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}], [{"bbox": [318.0412902832031, 386.6877136230469, 390.6852111816406, 393.8165283203125], "spans": [[6, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [6, 7]}, {"bbox": [446.19329833984375, 386.6877136230469, 465.16595458984375, 393.8165283203125], "spans": [[6, 1]], "text": "37.80", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7]}], [{"bbox": [318.0412902832031, 373.1529541015625, 390.6852111816406, 380.2817687988281], "spans": [[7, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [7, 8]}, {"bbox": [441.968505859375, 373.1529541015625, 465.1609191894531, 380.2817687988281], "spans": [[7, 1]], "text": "490.45", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8]}], [{"bbox": [318.0412902832031, 359.6453857421875, 390.6678771972656, 366.7745361328125], "spans": [[8, 0]], "text": "**** **** **** 1234 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [8, 9]}, {"bbox": [437.7164001464844, 359.6453857421875, 465.1811218261719, 366.7742004394531], "spans": [[8, 1]], "text": "1005.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9]}], [{"bbox": [318.0412902832031, 346.11041259765625, 390.6852111816406, 353.2392272949219], "spans": [[9, 0]], "text": "**** **** **** 1234", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [9, 10]}, {"bbox": [441.968505859375, 346.11041259765625, 465.1609191894531, 353.2392272949219], "spans": [[9, 1]], "text": "750.33", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10]}], [{"bbox": [318.0412902832031, 332.5756530761719, 390.6852111816406, 339.7044677734375], "spans": [[10, 0]], "text": "**** **** **** 0001", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 10, "row-header": false, "row-span": [10, 11]}, {"bbox": [446.19329833984375, 332.5756530761719, 465.16595458984375, 339.7044677734375], "spans": [[10, 1]], "text": "10.00", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 10, "row-header": false, "row-span": [10, 11]}]], "model": null, "bounding-box": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}, {"height": 792.0, "page": 15, "width": 612.0}, {"height": 792.0, "page": 16, "width": 612.0}, {"height": 792.0, "page": 17, "width": 612.0}, {"height": 792.0, "page": 18, "width": 612.0}, {"height": 792.0, "page": 19, "width": 612.0}, {"height": 792.0, "page": 20, "width": 612.0}, {"height": 792.0, "page": 21, "width": 612.0}, {"height": 792.0, "page": 22, "width": 612.0}, {"height": 792.0, "page": 23, "width": 612.0}, {"height": 792.0, "page": 24, "width": 612.0}, {"height": 792.0, "page": 25, "width": 612.0}, {"height": 792.0, "page": 26, "width": 612.0}, {"height": 792.0, "page": 27, "width": 612.0}, {"height": 792.0, "page": 28, "width": 612.0}, {"height": 792.0, "page": 29, "width": 612.0}, {"height": 792.0, "page": 30, "width": 612.0}, {"height": 792.0, "page": 31, "width": 612.0}, {"height": 792.0, "page": 32, "width": 612.0}, {"height": 792.0, "page": 33, "width": 612.0}, {"height": 792.0, "page": 34, "width": 612.0}, {"height": 792.0, "page": 35, "width": 612.0}, {"height": 792.0, "page": 36, "width": 612.0}, {"height": 792.0, "page": 37, "width": 612.0}, {"height": 792.0, "page": 38, "width": 612.0}, {"height": 792.0, "page": 39, "width": 612.0}, {"height": 792.0, "page": 40, "width": 612.0}, {"height": 792.0, "page": 41, "width": 612.0}, {"height": 792.0, "page": 42, "width": 612.0}, {"height": 792.0, "page": 43, "width": 612.0}, {"height": 792.0, "page": 44, "width": 612.0}, {"height": 792.0, "page": 45, "width": 612.0}, {"height": 792.0, "page": 46, "width": 612.0}, {"height": 792.0, "page": 47, "width": 612.0}, {"height": 792.0, "page": 48, "width": 612.0}, {"height": 792.0, "page": 49, "width": 612.0}, {"height": 792.0, "page": 50, "width": 612.0}, {"height": 792.0, "page": 51, "width": 612.0}, {"height": 792.0, "page": 52, "width": 612.0}, {"height": 792.0, "page": 53, "width": 612.0}, {"height": 792.0, "page": 54, "width": 612.0}, {"height": 792.0, "page": 55, "width": 612.0}, {"height": 792.0, "page": 56, "width": 612.0}, {"height": 792.0, "page": 57, "width": 612.0}, {"height": 792.0, "page": 58, "width": 612.0}, {"height": 792.0, "page": 59, "width": 612.0}, {"height": 792.0, "page": 60, "width": 612.0}, {"height": 792.0, "page": 61, "width": 612.0}, {"height": 792.0, "page": 62, "width": 612.0}, {"height": 792.0, "page": 63, "width": 612.0}, {"height": 792.0, "page": 64, "width": 612.0}, {"height": 792.0, "page": 65, "width": 612.0}, {"height": 792.0, "page": 66, "width": 612.0}, {"height": 792.0, "page": 67, "width": 612.0}, {"height": 792.0, "page": 68, "width": 612.0}, {"height": 792.0, "page": 69, "width": 612.0}, {"height": 792.0, "page": 70, "width": 612.0}, {"height": 792.0, "page": 71, "width": 612.0}, {"height": 792.0, "page": 72, "width": 612.0}, {"height": 792.0, "page": 73, "width": 612.0}, {"height": 792.0, "page": 74, "width": 612.0}, {"height": 792.0, "page": 75, "width": 612.0}, {"height": 792.0, "page": 76, "width": 612.0}, {"height": 792.0, "page": 77, "width": 612.0}, {"height": 792.0, "page": 78, "width": 612.0}, {"height": 792.0, "page": 79, "width": 612.0}, {"height": 792.0, "page": 80, "width": 612.0}, {"height": 792.0, "page": 81, "width": 612.0}, {"height": 792.0, "page": 82, "width": 612.0}, {"height": 792.0, "page": 83, "width": 612.0}, {"height": 792.0, "page": 84, "width": 612.0}, {"height": 792.0, "page": 85, "width": 612.0}, {"height": 792.0, "page": 86, "width": 612.0}, {"height": 792.0, "page": 87, "width": 612.0}, {"height": 792.0, "page": 88, "width": 612.0}, {"height": 792.0, "page": 89, "width": 612.0}, {"height": 792.0, "page": 90, "width": 612.0}, {"height": 792.0, "page": 91, "width": 612.0}, {"height": 792.0, "page": 92, "width": 612.0}, {"height": 792.0, "page": 93, "width": 612.0}, {"height": 792.0, "page": 94, "width": 612.0}, {"height": 792.0, "page": 95, "width": 612.0}, {"height": 792.0, "page": 96, "width": 612.0}, {"height": 792.0, "page": 97, "width": 612.0}, {"height": 792.0, "page": 98, "width": 612.0}, {"height": 792.0, "page": 99, "width": 612.0}, {"height": 792.0, "page": 100, "width": 612.0}, {"height": 792.0, "page": 101, "width": 612.0}, {"height": 792.0, "page": 102, "width": 612.0}, {"height": 792.0, "page": 103, "width": 612.0}, {"height": 792.0, "page": 104, "width": 612.0}, {"height": 792.0, "page": 105, "width": 612.0}, {"height": 792.0, "page": 106, "width": 612.0}, {"height": 792.0, "page": 107, "width": 612.0}, {"height": 792.0, "page": 108, "width": 612.0}, {"height": 792.0, "page": 109, "width": 612.0}, {"height": 792.0, "page": 110, "width": 612.0}, {"height": 792.0, "page": 111, "width": 612.0}, {"height": 792.0, "page": 112, "width": 612.0}, {"height": 792.0, "page": 113, "width": 612.0}, {"height": 792.0, "page": 114, "width": 612.0}, {"height": 792.0, "page": 115, "width": 612.0}, {"height": 792.0, "page": 116, "width": 612.0}, {"height": 792.0, "page": 117, "width": 612.0}, {"height": 792.0, "page": 118, "width": 612.0}, {"height": 792.0, "page": 119, "width": 612.0}, {"height": 792.0, "page": 120, "width": 612.0}, {"height": 792.0, "page": 121, "width": 612.0}, {"height": 792.0, "page": 122, "width": 612.0}, {"height": 792.0, "page": 123, "width": 612.0}, {"height": 792.0, "page": 124, "width": 612.0}, {"height": 792.0, "page": 125, "width": 612.0}, {"height": 792.0, "page": 126, "width": 612.0}, {"height": 792.0, "page": 127, "width": 612.0}, {"height": 792.0, "page": 128, "width": 612.0}, {"height": 792.0, "page": 129, "width": 612.0}, {"height": 792.0, "page": 130, "width": 612.0}, {"height": 792.0, "page": 131, "width": 612.0}, {"height": 792.0, "page": 132, "width": 612.0}, {"height": 792.0, "page": 133, "width": 612.0}, {"height": 792.0, "page": 134, "width": 612.0}, {"height": 792.0, "page": 135, "width": 612.0}, {"height": 792.0, "page": 136, "width": 612.0}, {"height": 792.0, "page": 137, "width": 612.0}, {"height": 792.0, "page": 138, "width": 612.0}, {"height": 792.0, "page": 139, "width": 612.0}, {"height": 792.0, "page": 140, "width": 612.0}, {"height": 792.0, "page": 141, "width": 612.0}, {"height": 792.0, "page": 142, "width": 612.0}, {"height": 792.0, "page": 143, "width": 612.0}, {"height": 792.0, "page": 144, "width": 612.0}, {"height": 792.0, "page": 145, "width": 612.0}, {"height": 792.0, "page": 146, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data/redp5695.doctags.txt b/tests/data/redp5695.doctags.txt new file mode 100644 index 00000000..cc059e5f --- /dev/null +++ b/tests/data/redp5695.doctags.txt @@ -0,0 +1,443 @@ + +Front cover +
+ +
+IBM Cloud Pak for Data on IBM Z +
+ +
+
+ +
+Executive overview +Most industries are susceptible to fraud, which poses a risk to both businesses and consumers. According to The National Health Care Anti-Fraud Association, health care fraud alone causes the nation around $68 billion annually.$^{1}$ This statistic does not include the numerous other industries where fraudulent activities occur daily. In addition, the growing amount of data that enterprises own makes it difficult for them to detect fraud. Businesses can benefit by using an analytical platform to fully integrate their data with artificial intelligence (AI) technology. +With IBM Cloud Pakfi for Data on IBM Z, enterprises can modernize their data infrastructure, develop, and deploy machine learning (ML) and AI models, and instantiate highly efficient analytics deployment on IBM LinuxONE. Enterprises can create cutting-edge, intelligent, and interactive applications with embedded AI, colocate data with commercial applications, and use AI to make inferences. +This IBM Redguide publication presents a high-level overview of IBM Z. It describes IBM Cloud Pak for Data (CP4D) on IBM Z and IBM LinuxONE, the different features that are supported on the platform, and how the associated features can help enterprise customers in building AI and ML models by using core transactional data, which results in decreased latency and increased throughput. +This publication highlights real-time CP4D on IBM Z use cases. Real-time Clearing and Settlement Transactions, Trustworthy AI and its Role in Day-To-Day Monitoring, and the Prevention of Retail Crimes are use cases that are described in this publication. Using CP4D on IBM Z and LinuxONE, this publication shows how businesses can implement a highly efficient analytics deployment that minimizes latency, cost inefficiencies, and potential security exposures that are connected with data transportation. +IBM Z: An overview +Ever wonder how many transactions a bank processes per day? What about the pace at which these transactions happen? According to an IBMfi report, 44 of 50 of the world's top banks use IBM Z mainframes for these daily transactions.$^{2}$ IBM Z is a platform that is designed for voluminous data, maximum security, real-time transaction analysis, and cost efficiency. +The most recent platform for IBM Z is IBM z16™. The IBM z16 supports the following features: +GLYPH On-chip AI acceleration +GLYPH Quantum-safe crypto discovery +GLYPH Simplified compliance +GLYPH Flexible capacity +GLYPH Modernization of applications +GLYPH Sustainability +With these features, enterprises can upgrade applications while preserving secure and resilient data. +To learn more about these features, see the IBM z16 product page. +Figure 1 on page 3 shows a picture of the IBM z16 mainframe. +Figure 1 IBM z16 +
+ +Figure 1 IBM z16 +
+IBM z16 and IBM LinuxONE Emperor 4 features +IBM Z are based on enterprise mainframe technology. Starting with transaction-based workloads and databases, IBM Z has undergone tremendous transformations in its system design for many generations to build servers that cater to Linux-based workloads and security with a cyberresilient system, and support quantum computing and modernization by using a hybrid cloud with a focus on data and AI. +Figure 2 provides a snapshot of the IBM Z processor roadmap, which depicts the journey of transformation and improvement. +Figure 2 IBM Z: Processor roadmap +
+ +Figure 2 IBM Z: Processor roadmap +
+The IBM z16 and IBM LinuxONE Emperor 4 are the latest of the IBM Z, and they are developed with a 'built to build' focus to provide a powerful, cyberresilient, open, and secure platform for business with an extra focus on sustainability to help build sustainable data centers. Although the z16 server can host both IBM z/OSfi and Linux workloads, LinuxONE Emperor 4 is built to host Linux only workloads with a focus on consolidation and resiliency. Depending on the workload, consolidation from numerous x86 servers into a LinuxONE Emperor 4 can help reduce energy consumption by 75% and data center floor space by 50%, which helps to achieve the sustainability goals of the organization. +Figure 3 on page 5 shows a summary of the system design of IBM LinuxONE Emperor 4 with the IBM Telum™ processor. The IBM Telum processor chip is designed to run enterprise applications efficiently where their data resides to embed AI with super low latency. The support for higher bandwidth and I/O rates is supported through FCP Express cards with an endpoint security solution. The memory subsystem supports up to 40 TB of memory. +Figure 3 System design of IBM z16 LinuxONE Emperor 4 +
+ +Figure 3 System design of IBM z16 LinuxONE Emperor 4 +
+The IBM z16 and IBM LinuxONE Emperor 4 servers are built with 7-nm technology at a 5.2 GHz speed. They consist of four dual-chip modules (DCMs) per central processor complex (CPC) drawer, each of which is built with two 8-core Telum processor chips that has "first in the industry" on-chip acceleration for mid-transaction, real-time AI inferencing, which supports many different use cases, including fraud detection. +Each core has access to a huge private 32 MB L2 cache where up to 16 MB of the L2 cache of an inactive core can be used as virtual cache (L3 / L4) by neighboring active cores on the chip. This cache helps address translation and access checking by prefetching the same virtual cache into the L2 cache. The virtual cache also includes Neural Network Processing Assist instructions and direct memory access with protection, and per chip GZIP compression. +Figure 4 provides more information about the features of AI Accelerator integration with the IBM Z processor cores. +Figure 4 IBM z16 on-chip AI Accelerator integration with IBM Z processor cores +
+ +Figure 4 IBM z16 on-chip AI Accelerator integration with IBM Z processor cores +
+The IBM z16 and IBM LinuxONE Emperor 4 server platforms are built with the hardware features that are shown in Figure 4 with addressing data and AI workloads in mind. Regardless of where the ML and deep learning (DL) frameworks are used to build and train data and AI models, the inferencing on existing enterprise application data can happen along currently running enterprise business applications. CP4D 4.6 supports Tensorflow and IBM Snap ML frameworks, which are optimized to use the on-chip AI Accelerator during inferencing. Support for various other frameworks is planned for future releases. +Figure 5 on page 7 shows the seamless integration of AI into existing enterprises workloads on the IBM z16 while leveraging the underlying hardware capabilities. +Figure 5 Seamless integration +
+ +Figure 5 Seamless integration +
+What is Cloud Pak for Data on IBM Z +IBM Cloud Pak for Data allows enterprises to simplify, unify, and automate the delivery of data and AI. It categorizes the activities within the journey to AI as four rungs of the AI Ladder: Collect, Organize, Analyze, and Infuse. For more information about each of the AI Ladder rungs, see Become Data Driven with IBM Z Infused Data Fabric , REDP-5680. +CP4D on IBM Z provides enterprises with a resilient and secure private cloud platform. You can use it to create ML and AI models that may be included into modern intelligent applications. You also can use it to use and construct applications for mission-critical data. With CP4D on IBM Z, enterprises can lower data movement latency, cost inefficiencies, and potential security exposures. Enterprises can safely store and access their most important company data, and leverage their current infrastructure by using cutting-edge hybrid cloud applications. Enterprises can combine their current database applications without any rewrites, which results in reduced cost and complexity. Lastly, by using CP4D on IBM Z, enterprises can update their database infrastructure to benefit from easier management, a quicker time to value, and lower operating expenses. +Figure 6 shows a solution overview of CP4D. The infrastructure alternatives are shown at the bottom, and they include IBM Z and LinuxONE. They all leverage Red Hat OpenShift. Common Foundational Services come next, which offer clarity throughout the data and AI lifecycle, that is, from user access management to monitoring and service provisioning. A high-level view of the services is shown in the middle section. The services have several different capabilities that span the AI hierarchy. The platform can be expanded, and it offers a seamless user experience for all distinct personas across the AI lifecycle, from data gathering through AI infusion. +Figure 6 Solution overview of Cloud Pak for Data +
+ +Figure 6 Solution overview of Cloud Pak for Data +
+We highlight the four main pillars that make IBM Z the correct infrastructure for CP4D: +GLYPH Performance and Scale +GLYPH Embedded Accelerators +GLYPH Reliability and Availability +GLYPH Security and Governance. +From a performance perspective, CP4D on IBM Z provides your data and AI with high transaction processing and a powerful infrastructure. From the embedded accelerators perspective, CP4D on IBM Z can investigate each transaction thanks to a cutting-edge DL inference technology even in the most demanding, sensitive, and latency-prone real-time workloads. From a reliability perspective, CP4D on IBM Z provides high availability and resiliency. Lastly from the security perspective, CP4D on IBM Z is suitable for protecting sensitive data and AI models for enterprises in highly regulated industries or those industries that are worried about security. +Cloud Pak for Data capabilities on IBM Z and IBM LinuxONE +With CP4D on IBM Z and IBM LinuxONE, users can develop, train, and deploy AI and ML models. Users can accomplish this task by using the CP4D IBM Watsonfi Studio and IBM Watson Machine Learning (WLM) services. By using these two fundamental services, users can accomplish the following tasks: +GLYPH Provision various containerized databases. +GLYPH Explore, clean, shape, and alter data by using Data Refinery. +GLYPH Use project-specific data that is uploaded, or connect to distant data. +GLYPH Create Spark run times and applications. +GLYPH Create, build, evaluate, and deploy analytics and ML models with trust and transparency. +GLYPH Leverage the AI Integrated Accelerator for TensorFlow 2.7.2 and Snap ML 1.9. +For more information about the specifics of these capabilities, see Capabilities on Linux on IBM Z and IBM LinuxONE. +Open-source ecosystem +These days, innovation and product development are not limited to closed doors within an organization. In any industry sector, the solutions include a mix of proprietary code addressing the core business solution that is supported or integrated into other software components from open source. In some cases, enterprises business solutions also are built from open-source community offerings. Thus, open-source software becomes an important ingredient in modern-day solution building. +IBM actively participates in various open-source communities as part of steering boards defining the roadmap of the community, and also in contributing code to make the community a better place for everyone to participate. Red Hat also actively participates in various open-source communities and makes extensive contributions. In open-source communities, although most open-source development happens on x86 / amd64 or the Intel architecture, the same open-source software is used by other architectures, such as IBM Power (ppc64le), IBM Z and IBM LInuxONE (s390x), ARM, and Sparc. So, the availability of an open-source ecosystem on any architecture is key and critical to business. +On IBM Z and IBM LinuxONE (s390x) architecture, there is a huge open-source support ecosystem that ranges from operating systems such as Linux; application run times; cloud and container services; DevOps and automation; big data; observability; analytics; databases; and storage. The ecosystem on IBM Z and IBM LinuxONE is growing. +IBM Z and IBM LinuxONE include much open-source software in their ecosystem. You can see the growing list of open-source software for IBM Z and LinuxONE at The Growing Ecosystem of Open-Source Software for IBM Z and LinuxONE. +IBM Z and IBM LinuxONE are available to various communities to include support for s390x builds as part of their community's continuous integration and continuous delivery (CI/CD). Also, for open-source community developers, infrastructure resources are available on a no-charge basis through the IBM LinuxONE community cloud. +CP4D includes a mix of open-source and proprietary data and AI runtime databases; open-source run times like Python; open-source data platforms like Anaconda; ML and DL frameworks like Pytorch and Tensorflow; and thousands of reusable Python packages. All of them are available and supported on s390x architecture to provide seamless parity with x86 architecture and a seamless experience for enterprise data scientists, architects, and data and AI solution developers on IBM Z and IBM LinuxONE platforms. +Anaconda is one of the open-source data platforms that provide Python and R based data science ML frameworks; analytics and data visualization tools; and open-source data science tools and libraries like Conda, XGBoost, and SciKit-Learn. Anaconda runs natively on Linux on IBM Z and IBM LinuxONE, and on IBM z/OS Container Extensions (zcX) on z/OS. For more information, see Announcing Anaconda for Linux on IBM Z and LinuxONE. +In addition to strong, open-source ecosystem support for application development on Linux and enterprise operating systems, a new generation of IBM Z and IBM LinuxONE servers (IBM z16™) also have strong platform support, and AI acceleration capabilities that can be leveraged by open-source software to perform better on the server infrastructure. For example, the recently released CP4D 4.6 has Tensorflow and IBM SnapML frameworks that leverage the AI accelerators when running on an IBM z16 server. +So, to summarize, there is a huge, growing data and AI open source ecosystem that is supported and optimized on IBM Z and IBM LinuxONE servers. +Why AI on IBM Z +Data and AI playing a major role in the modernization story to enable the digital transformation journey of every organization. Many organizations recognize the business value of infusing AI into their infrastructure. CP4D provides the cloud-native solution to put your data to work. With CP4D, all your data users can collaborate from a single, unified interface that supports many services that work together, including collecting data, organizing the data, analyzing the data, and infusing AI. +Traditional ML models' power most of today's ML applications in business and among AI practitioners. CP4D supports traditional ML frameworks for training and inferencing, such as Scikit-learn, Snap ML, and XGBoost. Snap ML is a library that provides high-speed training and inferencing of ML models that leverage the AI accelerator while running on an IBM z16 (Linux on IBM Z). CP4D supports DL frameworks such as TensorFlow and PyTorch. TensorFlow is a DL framework that leverages the AI accelerator while running on an IBM z16 (Linux on IBM Z). +Figure 7 on page 11 provides an overview of the components that are supported on CP4D on IBM Z. You can leverage Watson Studio for model building, training, and validation, and WML for deployment of the model. Eventually, applications can use the AI inference endpoint to score the model. +Figure 7 Developing, training, and deploying an AI model on Cloud Pak for Data on IBM Z and IBM LinuxONE +
+ +Figure 7 Developing, training, and deploying an AI model on Cloud Pak for Data on IBM Z and IBM LinuxONE +
+In summary, here are some of the reasons why you should choose AI on IBM Z: +GLYPH World-class AI inference platform for enterprise workloads: +-Embedded accelerators: A centralized on-chip AI accelerator that is shared by all cores. +-Industry standard AI ecosystem: Many industry open-source data science frameworks are available on the platform. +-Seamlessly integrate AI into existing enterprise workload stacks: Train anywhere, and then deploy on IBM Z. +GLYPH Security: Encrypted memory, and improved trusted execution environments. +GLYPH Sustainability: Reduce your energy consumption with real-time monitoring tools about the energy consumption of the system. +AI use cases +With billions of transactions per day in many of today's industries, it is key to get real-time insights about what is happening in your data. AI on the IBM Z stack understands these situations, and it delivers in-transaction inference in real time and at scale. +Core banking solutions running on IBM Z that are involved in processing inbound transactions need real-time fraud detection to prevent fraud. Other types of possible use cases might be credit risk analysis, anti-money laundering, loan approval, fraud detection in payments, and instant payments. +For insurance companies, a pressing use case would be claims processing. For markets and trading, clearing and settlement use cases are paramount. +For the health care industry, medical image processing (such as MRIs and x-rays), skin cancer detection, and patient monitoring activities such as infant motion analysis, is important. +For the airline industry, processes such as air traffic management, flight management systems, and flight maintenance predictions are use cases that are ideal candidates for using AI on IBM Z. +In the following sections, we describe the following use cases: +GLYPH "Use case 1: Responsible AI augmented with risk and regulatory compliance" on page 12 AI model lifecycle governance, risk management, and regulatory compliance are key to the success of the enterprises. It is imperative to adopt a typical AI model lifecycle to protect new end-to-end risks. +GLYPH "Use case 2: Credit default risk assessment" on page 22 +Core banking solutions running on IBM Z that are involved in processing inbound transactions need real-time fraud detection to prevent fraud. Other types of possible use cases might be credit risk analysis, anti-money laundering, loan approval, fraud detection in payments, and instant payments. +GLYPH "Use case 3: Clearing and settlement" on page 25 +The use of AI can help to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. +GLYPH "Use case 4: Remaining Useful Life of an aircraft engine" on page 27 We describe how AI can help to avoid unplanned aircraft downtime by determining the remaining time or cycles that an aircraft engine is likely to operate before failure. +GLYPH "Use case 5: AI-powered video analytics on an infant's motions for health prediction" on page 30 +In this section, we describe how AI can predict an infant's health conditions by monitoring real-time body movements. +Use case 1: Responsible AI augmented with risk and regulatory compliance +Advancement in AI is changing the world, and organizations must adopt AI to embrace new challenges daily. Many enterprises see tremendous value in adopting AI and ML technologies while establishing organization trust in the models, underlying data, and the process to be followed. An AI model lifecycle can be a daunting task. +How mature is your AI governance? In this section, we provide a use case demonstrating the trustworthiness of AI and its importance in daily monitoring. +Industry challenges +Here are the three main reasons why organizations struggle with the adoption of AI: +GLYPH Scaling with growing regulations +GLYPH Lack of confidence in operationalized AI (making responsible AI) +GLYPH Challenges around managing the risk throughout the entire AI workflow +Scaling with growing regulations +Laws and regulations in the data and AI space are accelerating, and many countries are proposing strict AI policies. Countries are monitoring adherence of these policies by the enterprises and imposing fines for any violations. Responding to these regulations are challenging global organizations where multiple regulations apply. For enterprises, it is important to adopt AI policies when there is change, and to validate explainable models to protect against discrimination. +Responsible AI +Responsible AI protects against loss of data privacy, and reduced customer loyalty and trust. A data scientist cannot maximize accuracy and model performance above all other concerns. Practicing responsible AI is a best practice, and you must establish protection and validation to ensure that any models that are placed into production are fair and explainable. +Risks throughout the entire AI workflow +Organizations need to mitigate risk of the following items: +GLYPH Deciding not to use certain technologies or practices +GLYPH Using personal information when needed and with a user's consent +GLYPH Ensuring automated decisions are free from bias +GLYPH Customer confidence by providing explanations for business decisions +GLYPH Fraud to the organization and to customer's accounts +GLYPH Delays in putting models into production +In fact, in a recent survey, these concerns were echoed by real AI adopters when asked what aspects of trust are most important to them. Although explaining how AI decides is the primary concern, all of these concerns are important. +The key point here is that risk exists throughout the entire AI lifecycle starting with the underlying data and the business justification behind the "why" of the project and continuing into production. Without a formalized process, there is no way to mitigate these risks to unlock the scale that is required to make automated decisions profitable. With these decisions, the business can operate proactively instead of reactively. +For example, a business can start testing a model before production for fairness metrics. For this task, enterprises need an end-to-end workflow with approvals to mitigate these risks and increase the scale of AI investments, as shown in Figure 8, which presents a typical AI model lifecycle in an enterprise. +Figure 8 Typical AI model lifecycle +
+ +Figure 8 Typical AI model lifecycle +
+Due to regulations, more stakeholders adopt the typical AI model lifecycle to protect their brand from new end-to-end risks. To ensure various aspects of both regulatory compliance and security, the personas that must be involved include the chief financial officer (CFO), chief marketing officer (CMO), chief data officer (CDO), HR, and chief regulatory officer (CRO), along with the data engineers, data scientists, and business analysts, who build AI workflows. +IBM governance solution for IBM Z +AI model lifecycle governance, risk management, and regulatory compliance are key to the success of enterprises. +AI governance is a comprehensive framework that uses a set of automated processes, methodologies, and tools to manage an organization's use of AI. Consistent principles guiding the design, development, deployment, and monitoring of models are critical in driving responsible and trustworthy AI. AI governance includes processes that trace and record the origin of data, models (including associated metadata), and pipelines for audits. The details of entry should include the techniques that trained each model, the hyperparameters that were used, and the metrics from testing phases. These details provide increased transparency into the model's behavior throughout the lifecycle, the data that was influential in its development, and the possible risks. +In a world where trust, transparency and explainable AI matters, every organization wants compliance along with the comfort of understanding how analytic insights and decisions are made. The following sections describe some of the principles and organizational requirements for AI governance. +Lifecycle governance +Lifecycle governance helps you manage your business information throughout its lifecycle, that is, from creation to deletion. IBM AI governance addresses the problems that challenge records managements: +GLYPH Monitor, catalog, and govern AI models from anywhere throughout the AI lifecycle. +GLYPH Automate the capture of model metadata for report generation. +GLYPH Drive transparent and explainable AI at scale. +GLYPH Increase accuracy of predictions by identifying how AI is used and where it is lagging. +Risk management +Risk management is used in IBM AI governance to identify, manage, monitor, and report on risk and compliance initiatives at scale: +GLYPH Automate facts and workflow management to comply with business standards. +GLYPH Use dynamic dashboards for clear and concise customizable results. +GLYPH Enhanced collaboration across multiple regions and geographies. +Regulatory compliance +Regulatory compliance is a set of rules that organizations must follow to protect sensitive information and ensure human safety. Any business that works with digital assets, consumer data, health regulations, employee safety, and private communications is subject to regulatory compliance.$^{3}$ The IBM AI governance solution for IBM Z includes the following tasks: +GLYPH Help adhere to external AI regulations for audit and compliance. +GLYPH Convert external AI regulations into policies for automatic enforcement. +GLYPH Use dynamic dashboards for compliance status across policies and regulations. +Enterprises can develop AI models and deploy them by using IBM Watson Studio or WML on CP4D on Red Hat OpenShift on a virtual machine that is based on IBM z/VM or Red Hat Enterprise Linux KVM on IBM Z. AI governance on IBM LinuxONE is supported in the following two ways: +GLYPH Monitor the AI models with Watson OpenScale on CP4D on Red Hat OpenShift on a virtual machine on IBM Z. +GLYPH Enterprises can develop AI models by creating and training models by using Watson Studio and development tools such as Jupyter Notebook or JupyterLab, and then deploying the model onto WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z. Then, these enterprises can achieve end-end AI governance by running AI Factsheets, IBM Watson OpenScale, and IBM Watson OpenPagesfi on CP4D on x86. +Figure 9 on page 16 shows the end-to-end flow for a remote AI governance solution. +Figure 9 Remote AI governance solution end-to-end flow +
+ +Figure 9 Remote AI governance solution end-to-end flow +
+To achieve end-to-end AI governance, complete the following steps: +1. Create a model entry in IBM OpenPages by using CP4D on a x86 platform, as shown in Figure 10. +Figure 10 Creating a model entry in IBM OpenPages +
+ +Figure 10 Creating a model entry in IBM OpenPages +
+2. Train a model by using Watson Studio and by using development tools such as Jupyter Notebook or JupyterLab on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, as shown in Figure 11. +Figure 11 Training an AI model by using Watson Studio +
+ +Figure 11 Training an AI model by using Watson Studio +
+3. Deploy the model by using WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, as shown in Figure 12. +Figure 12 Deploying an AI model by using WML on Cloud Pak for Data +
+ +Figure 12 Deploying an AI model by using WML on Cloud Pak for Data +
+4. Track the external model lifecycle by browsing through the Catalogs/Platform assets catalog by using AI Factsheets and OpenPages while using CP4D on an x86 platform, as shown in Figure 13. The external model (deployed on CP4D on Red Hat OpenShift on a virtual machine on IBM Z) is saved as a platform asset catalog on the x86 platform. +Figure 13 External model +
+ +Figure 13 External model +
+You can track the model through each stage of the model lifecycle, as shown in Figure 14, by using AI Factsheets and OpenPages. +Figure 14 Tracking the model +
+ +Figure 14 Tracking the model +
+You can see that the model facts are tracked and synchronized to IBM OpenPages for risk management, as shown in Figure 15. +Figure 15 Model facts that are tracked and synchronized to IBM OpenPages on an x86 platform +
+ +Figure 15 Model facts that are tracked and synchronized to IBM OpenPages on an x86 platform +
+5. Create an external model by using IBM OpenScale on the x86 platform, as shown in Figure 16. +Figure 16 Creating an external model on an x86 platform +
+ +Figure 16 Creating an external model on an x86 platform +
+IBM OpenScale provides a comprehensive dashboard that tracks fairness, quality monitoring, drift, and explainability of a model. Fairness determines whether your model produces biased outcomes. Quality determines how well your model predicts outcomes. Drift is the degradation of predictive performance over time. A sample is shown in Figure 17 on page 21. +Figure 17 IBM OpenScale dashboard that is used to monitor the external model +
+ +Figure 17 IBM OpenScale dashboard that is used to monitor the external model +
+You developed and deployed the AI model by using Watson Studio, WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, and end-to-end AI model governance by leveraging AI Factsheets, OpenScale, and OpenPages on CP4D on a x86 platform. Figure 18 shows end-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale. +Figure 18 Final result: End-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale +
+ +Figure 18 Final result: End-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale +
+Use case 2: Credit default risk assessment +In today's world, many individuals or businesses seeking loans to meet their growing business needs often look to financial institutions. Financial institutions can offer loans to individuals or businesses and charge interest based on the current market situations. +Industry challenges +Financial institutions must make an accurate decision about whether to sanction a loan or not, and judging the likelihood of default is the difference between a successful and unsuccessful loan portfolio. In a traditional scenario, an experienced banker can judge someone's likelihood of default, but that is not an efficient method for judgment as a business grows. +Predictions of credit default risk assessment +In the modern world, growing business institutions can no longer rely on only experienced bankers to decide whether to sanction a loan knowing that there is a probability that the borrower might default on their loans. A better choice is to rely on technological advancements that can help with reasoning based on facts, such as leveraging credit risk modeling techniques to process the historical data of past borrowers to understand their credit behavior and make a more informed decision about whether to lend money, how much money, and decide on the tenure to close the loan. +Financial institutions can leverage AI solutions by using ML techniques to predict the credit risk. Applying AI to credit risk modeling techniques can benefit institutions in decision-making, and thus can help better manage the exposure to credit risk. +Figure 19 on page 23 shows a sample architecture about how to design and develop an AI model for credit risk assessment on IBM Z. An IBM WebSpherefi Application Server is used for handling in-bound transactions, and CP4D is used for AI model lifecycle management that includes building, training, and deploying the model. +Figure 19 Architecture for credit risk prediction by using an ML AI model on IBM Z +
+ +Figure 19 Architecture for credit risk prediction by using an ML AI model on IBM Z +
+A data scientist can leverage Watson Studio to develop and train an AI model and WML to deploy and score the model. In this sample architecture, the WML Python run time leverages the ML framework, IBM Snap Machine Learning (Snap ML), for scoring, can leverage an integrated AI accelerator at the time of model import. +Then, the banking loan approval team can send a loan applicant request to the IBM WebSphere Application Server, which can make a request to the AI inference endpoint. The AI inference engine scores the transaction and sends the result back to the loan approval team. Based on the results, the approval team can decide on whether to approve a loan or not, and also decide how much they can lend, timelines, and other factors. +The transaction system that is shown in Figure 19 uses IBM WebSphere Liberty as an application server, but you also can use an IBM Open Libertyfi application server or any application server that can send RESTful API communications. +Models are frequently developed and tested in many platforms and languages, such as Python, Scala, R, and Go. Models can leverage ML frameworks like scikit-learn, Snap ML, or XGBoost, or DL frameworks like TensorFlow or PyTorch. Training a model can be done on any platform if you have enough computing power for complex models, but moving that model into production requires careful testing to ensure that transactions are not delayed, especially if you plan to run the model within a transaction. +We showed how IBM Z enable customers to use AI frameworks to detect credit risk. Now, we look at how you can leverage CP4D and TensorFlow on IBM Z to detect the credit risk. +Figure 20 shows an architecture for predicting credit risk by using DL on IBM Z. +Figure 20 Architecture for credit risk prediction by using DL on IBM Z +
+ +Figure 20 Architecture for credit risk prediction by using DL on IBM Z +
+Data scientists can start creating and training a DL AI model by using a Jupyter Notebook instance and Watson Studio. Then, they can deploy the model by using WML on CP4D running on IBM Z, which provides an endpoint. Other applications, including the IBM WebSphere server, can produce credit risk results by using the model's endpoint. +In summary, here are some considerations for developing real-time AI models, such as credit risk assessment: +GLYPH A preference for in-platform run times of the model, such as faster execution results. +GLYPH Less overhead in the end-to-end flows might improve scoring time. +GLYPH If you are using models that are not deployable, CP4D offers a custom Python run time to build your own stack if they are not available on the platform. +GLYPH AI inferencing based on ML or DL models can increase the accuracy of better credit risk assessment. +GLYPH Using IBM z16 and on-chip AI acceleration with the Telum chip that is embedded with regular Integrated Facility for Linux (IFLs) provides an execution speed for your transactions that cannot be achieved by other means. +Use case 3: Clearing and settlement +Clearing and settlements involve banks or financial institutions sending and receiving wire transfers by using secure interbank payments networks that can clear or settle numerous transactions. When an individual or business entity initiates a wire transfer, clearing begins the fund delivery process. Banks can begin the settlement phase either immediately after clearing takes place or later, mostly at the end of the business day. +Industry challenge +Banks and financial institutions must deal with high-risk transactions that can lead to loss. Moreover, these transactions can lead to regulatory violations and extra compliance costs. +Clearing and settlement solution +Use AI to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. The expedited remediation of questionable transactions can prevent costly consequences, regulatory violations, and negative business impacts. +In financial institutions, finding which financial transactions are legitimate and which transactions are fraudulent is of paramount importance. In this section, we go through a use case where we use AI to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. The expedited remediation of questionable transactions can prevent costly consequences, regulatory violations, and negative business impacts to financial institutions. +The goal is to predict in real time whether the transaction being processed might be a fraudulent transaction or not. To achieve this goal, we build an ML model that can do this prediction for the financial institution. Because there would be many transactions being processed at any point by the financial institution, it is important to perform this prediction of fraudulent transactions in near-real time in a few milliseconds. +One possible solution is to build and train a TensorFlow based DL model that learns from the historical data and predicts the fraudulent transactions. CP4D on IBM Z and IBM LinuxONE is a suitable product where this task can be achieved and the model deployed, and coming up with a serving endpoint. +Figure 21 provides a high-level diagram of a clearing and settlement use case for financial transactions that uses CP4D on IBM Z and IBM LinuxONE. +Figure 21 Clearing and settlement use case for financial transactions by using Cloud Pak for Data +
+ +Figure 21 Clearing and settlement use case for financial transactions by using Cloud Pak for Data +
+Here are the steps of the high-level process flow: +1. Create a connection to a database (for example, an IBM Db2fi database) where the historical data will be used for ML model building. +2. Read the data from the database and prepare the data for AI by using the Data Refinery tool in CP4D. +3. A Jupyter Notebook or JupyterLab IDE that is provided by the Watson Studio component in CP4D helps us build and train the AI model. The trained model can be saved into a WML repository. +4. Deploy the saved model into a deployment space for batch deployment. +5. Create a batch deployment by using any of these interfaces: +a. Watson Studio user interface from an Analytics deployment space. +b. WML Python client. +c. WML REST APIs. +6. A hardware configuration can be chosen for the deployment. +7. A batch deployment processes input data from a file, data connection, or connected data in a storage bucket, and writes the output to a selected destination. +8. One way to run batch deployment to predict or score is to create and run a batch deployment job. +9. Provide an input data type: +a. Inline data for entering a JSON format payload. +b. Select Data asset , click Select data source , and then specify your asset. +10.The output data type can be a new output file or a connected data asset. +11.A Kubernetes admin can change the maximum number of concurrent batch jobs that can be run. +12.Get the deployment endpoint URL. For more information, see Getting the deployment endpoint URL. +Summary +With this use case, we attempted to demonstrate how to predict, in real time, whether the transaction that is being processed might be a fraudulent transaction or not. By using the method, you have the following advantages: +GLYPH No Impact to SLAs and the batch process window. +GLYPH Proactively stop losses, and lower operational, regulatory, and compliance costs. +GLYPH The solution is using a DL framework like TensorFlow for high-performing, low latency scoring. +Use case 4: Remaining Useful Life of an aircraft engine +In this use case, we describe how an airline can deploy an AI model for inferencing by using IBMfi zSystems. +Remaining Useful Life (RUL) is the remaining time or cycles that an aircraft engine is likely to operate without any failure. In this case, it is the equivalent of the number of flights remaining for the engine after the last flight. By estimating RUL, the operator can decide on the next maintenance schedule and avoid unplanned downtime. +Figure 22 provides an overview of the inferencing architecture for the RUL of an aircraft engine when using IBM Z. +Figure 22 Inferencing architecture on IBM Z +
+ +Figure 22 Inferencing architecture on IBM Z +
+Because we are looking into data-driven model development, the data set of our target is the run-to-failure data of the engine. We are looking into a supervised learning problem, and we use regression techniques to learn from the data. DL techniques such as Long Short-Term Memory (LSTM) or Gated Recurrent Units (GRU) are our choice because we are looking into a time series data set. TensorFlow or PyTorch frameworks are leveraged to create models. AI governance monitors the data and model drift to maintain the model quality throughout the model's life. +Open-source data from NASA was used to build the AI model, which then was deployed on CP4D. CP4D enables the data-scientist's journey from modeling to deployment in a seamless process. Data engineers leverage Db2 to host the data set, which includes the training, testing, and validation of a data set. Since data is hosted on Db2, you can expect low latency while retrieving the data and serve data security needs because Db2 is hosted on the IBM Z platform. Data is fetched by the data refinery to do the necessary pre-processing and data imputations. You can use the programming languages Golang or C++ for real-time predictions, depending on customer needs. For more information about this topic, see "Use case 3: Clearing and settlement" on page 25. +Model building is done on Watson Studio, leveraging the high-performance computing hardware on IBM Z. You can train the model anywhere (on your own hardware or the cloud) and bring the model directly into CP4D, which provides data scientists with the flexibility of implementation choices. +We used LSTM to build the AI model and used the training data. The model was continuously evaluated to model convergence. The final model is tested with the test data, which is never exposed at the time of training to make sure that the model works. +This model is deployed on WML on CP4D and runs on IBM Z. If required, the trained model can be converted to the Open Neural Network Exchange (ONNX) format before deployment. Based on project requirements, IBM Z supports high-throughput, low latency inference requirements by leveraging an AI accelerator. +For decision-making about an aircraft engine's life, it is important to be able to explain the model predictions from end to end. This explainability may be global or local. Global explainability enables decision-makers to evaluate the trained model in general from the subject matter expert (SME) point of view. Local explainability enables the operator to validate the reasons behind the present inference and relate it to the past data points, which are an indicative cause of the prediction. +The AI governance components such as IBM OpenScale on CP4D support explainability and manages the drifts in data and concept. OpenPages and AI FactSheet together can alert the stakeholders about important events through a dashboard and allow course correction at any point. +Client-side applications can invoke a REST apiserver that handles some preprocessing of an incoming request before initiating the inference pipeline. Efficiencies might be needed in real-time applications, and inference response time can be reduced by adopting low-level programming while components are communicating. +Figure 23 on page 29 provides a more in-depth view of the architecture of an AI-based predictive maintenance application. +Figure 23 In-depth architectural view +
+ +Figure 23 In-depth architectural view +
+In summary, consider the following points while developing an AI-based predictive maintenance application: +GLYPH CP4D offers a Python run time to build a custom solution stack, but also supports different components like Watson Studio, WML, Db2, Data Refinery, OpenScale, AI Factsheets, and OpenPages. +GLYPH The trustworthiness of the predicted output is important for critical use cases. +GLYPH IBM Z provides high data security and low latency requirements at scale for the critical applications. +GLYPH A data scientist can choose to train the model and deploy it on CP4D seamlessly with the latest tech stack that is available. +GLYPH The AIOps and MLOps supported by CP4D to track AI model and data lifecycle throughout the application lifecycle. +Use case 5: AI-powered video analytics on an infant's motions for health prediction +Each year, approximately 5 million newborns worldwide are suffering from a neuro-developmental disorder. Due to the lack of early diagnoses and intervention, many infants are disabled and abandoned, especially in countries with limited numbers of pediatricians with extensive experience in neuro-developmental disorders. This situation is a conundrum that plagues many families around the world. +Infant motion analysis plays critical importance to understanding and comprehending healthy childhood development. In infants, monitoring their poses provides information about their health that can lead to a better prediction of early developmental risk assessment and diagnosis. +Adults use different techniques and methods to express their feelings (like sick, happy, stressed, or hungry), but this case is usually different for infants who cannot express their feelings. Based on the baby movements, AI can predict their expression or health. +In this use case, we examine how AI-powered video analytics can assist new parents and hospitals by addressing pose-based real-time body movements of the infants (such as arching back, head banging, kicking legs, rubbing eyes, stretching, and sucking fingers). During the initial months of a baby's life, spontaneous movements might indicate later developmental disorders, such as cerebral palsy, Rett syndrome, and autism spectrum disorders. +Industry challenges +There are video surveillance systems that are installed for monitoring an infant's movement in many hospitals or homes so that any problem can be witnessed and potentially even stopped before they take place. These systems require much manual work to monitor the real-stream videos and intervene when a problem is detected. +There is a certain amount of trust that you must place on the person who monitors a surveillance system to ensure that the job is being done effectively and efficiently, and that the surveillance system is being vigilantly watched. Because of the dependency on these manual efforts, you need something "smart" that monitors constantly the surveillance system and detect problems effectively. +AI is shaping the controls of surveillance that can map and track occurrences with self-learning abilities, AI can improve on human operations and analyze video footage in real time to alert the hospitals or parents if any anomalies are identified. +Video processing a stream of data from surveillance systems and then performing advance analytics and detecting anomalies quickly is a significant challenge in the industry. +Infant motion analytics in real time +AI is the current "market trend evolution" in video analytics and advancing the decision-making capabilities of the human mind. DL-based computer vision AI techniques are being widely adopted by various industries to solve real-time problems. These techniques improve the detection and prediction accuracy without increasing the hardware cost exponentially. For users, AI greatly reduces the workload of the monitoring staff and provides benefits by detecting unusual incidents and solving many video forensic problems. +S +Figure 24 Architecture for AI-powered video analytics +
+ +Figure 24 Architecture for AI-powered video analytics +
+Live camera feeds or recorded videos of an infant's movement are the inputs for a pose detection model. This video streaming data was stored in IBM Cloudfi Object Storage for image processing. Video data must be transformed into frames so that the infant's body poses can be detected. These post-estimation components of the pipeline predict the location of all 17-person key points with 3 degrees of freedom each (x, y location and visibility) plus two virtual alignment key points. This approach also embraces a compute-intensive heat map prediction of infant body posture. +When changes in body posture or movement happen, analytics can be performed, and a threshold can be set for the angle of the body and posture movements. An analysis can be performed on movement that is based on that threshold to help to predict an infant's health index in the output video stream by leveraging the IBM z16 on-chip AI acceleration, which provides an execution speed in real time on an edge device, which cannot be achieved by other means. +We can leverage the following AI technology stack for this use case: +GLYPH Convolutional neural network: Build an artificial neural network model on video streaming and images. +GLYPH TensorFlow: A DL back-end framework that is based on TensorFlow. +GLYPH Mediapipe: A library that helps with video streaming processing and prediction of human pose estimation. +GLYPH OpenCV: A real-time computer vision library that helps perform image processing. +CP4D was used to build and deploy the AI-powered video analytics on infant's motion for health prediction use case on IBM Z. IBM Z with AI accelerator enables faster inference for detecting face and body movements and performing angle analytics in real time. +Figure 24 shows an architectural diagram about how to design and develop an AI model for real-time body pose detection on IBM Z. A deep convolutional neural network architecture was trained on the task of infant pose estimation on the custom data set by leveraging IBM Cloud Pak for Data. +WML was used for deployment of the pose detection model and generated notifications to users with web and mobile applications, and it integrates with Fitbit for push notifications so that hospitals and parents can take preventive actions. +Additional resources +GLYPH The Cloud Pak for Data 4.5 on IBM Z Overview Demo video provides an overview of some of the more important features of CP4D on IBM Z. +GLYPH IBM Cloud Pak for Data Tutorials. +GLYPH Here are some additional use cases that use the data science frameworks that are available as part of CP4D on IBM Z and IBM LinuxONE: +-Payment Card Fraud Detection by using TensorFlow on CP4D on IBM Z and IBM LinuxONE is a payment card fraud detection use case. +-Fashion-MNIST clothing classification with PyTorch on Cloud Pak for Data on IBM Z and IBM LinuxONE is a Fashion-MNIST clothing classification use case. +-Payment Card Fraud Prevention by using Snap ML on IBM Cloud Pak for Data on Red Hat OpenShift on a virtual machine on IBM Z and IBM LinuxONE, which leverage the z16 integrated AI accelerator describes a use case that uses Snap Machine Learning in Cloud Pak for Data on IBM Z and IBM LinuxONE. It is a Snap ML use case. +A companion video can be found at Credit Card Fraud Detection by using Snap ML on IBM Cloud Pak for Data on IBM Z and IBM LinuxONE. +Summary +This IBM Redbooksfi publication presented an overview of how IBM Cloud Pak for Data on IBM Z can modernize your data infrastructure; develop and deploy ML and AI models; and instantiate highly efficient analytics deployment on IBM LinuxONE. This publication demonstrated these tasks by guiding the reader through five common use cases where CP4D on IBM Z and IBM LinuxONE uses the different features that are supported on the platform, and showing how the associated features can help an enterprise to build AI and ML models with core transactional data, which results in a highly efficient analytics deployment that minimizes latency, cost inefficiencies, and potential security exposures that are connected with data transportation. +Authors +This publication was produced by a team of specialists from around the world working with the IBM Redbooks team: +Jasmeet Bhatia is an AI on IBM Z Product Manager who supports CP4D on IBM Z. She has 2.5 years of combined experience as a data scientist and a product manager. Jasmeet lives in San Francisco, California and holds a Bachelor of Arts degree in Data Science. She is working on her Master of Science degree in Data Science. Her area of expertise includes AI, data science, and product management. +Ravi Gummadi is a Technical Leader for CP4D on Linux on IBM Z and IBM LinuxONE in India. He has 18+ years of experience in the design and development of enterprise software for various platforms, including IBM Z and IBM LinuxONE. He holds a master's degree in computer science and engineering from the Indian Institute of Technology Madras (IIT Madras). His areas of expertise include compilers, virtualization, big data analytics, containers, data, and AI, with a special focus on open-source ecosystems. +Chandra Shekhar Reddy Potula is a Lead AI on zSystems team Architect for Linux on IBM Z and LinuxONE in India. He has 18+ years of experience in the design and development of enterprise software and firmware for various platforms, including IBM Z and LinuxONE. He holds a degree in computer science of engineering from Jawaharlal Nehru Technological University (JNTU). His areas of expertise include networking, virtualization, containers, data, and AI, with a special focus on open-source ecosystems. +Srirama Sharma is a Lead Technical Architect for IBM Cloud Pak, IBM Instanafi, IBM Turbonomicfi, and Red Hat Advanced Cluster Management for Kubernetes (RHACM) on IBM Z and LinuxONE. He has 18+ years of experience in UNIX and Linux application and device driver development. He designs ISV solutions on IBM Systems and IBM Blockchainfi. He also works on cloud-native adoption of enterprise solutions on IBM Z and LinuxONE. Srirama holds a Bachelor of Engineering degree in computer science from Visvesvaraya Technological University (VTU). He lives in Bangalore, Karnataka. His areas of expertise include UNIX and Linux systems programming, virtualization, performance benchmarking of Financial Services Sector (FSS) industry solutions, open-source ecosystems, server infrastructure, and cloud-native adoption and modernization. +Thanks to the following people for their contributions to this project: +Lydia Parziale, Project Manager IBM Redbooks, Poughkeepsie Center +Shin Kelly Yang, AI on IBM Z Product Management IBM US +Tom Ramey, Anna Shugol, Andrew Sica, Jonathan Sloan, Elpida Tzortzatos, Meeta Vouk, IBM +Now you can become a published author, too! +Here's an opportunity to spotlight your skills, grow your career, and become a published author-all at the same time! Join an IBM Redbooks residency project and help write a book in your area of expertise, while honing your experience using leading-edge technologies. Your efforts will help to increase product acceptance and customer satisfaction, as you expand your network of technical contacts and relationships. Residencies run from two to six weeks in length, and you can participate either in person or as a remote resident working from your home base. +Find out more about the residency program, browse the residency index, and apply online at: +ibm.com /redbooks/residencies.html +Stay connected to IBM Redbooks +GLYPH Find us on LinkedIn: +http://www.linkedin.com/groups?home=&gid=2130806 +GLYPH Explore new Redbooks publications, residencies, and workshops with the IBM Redbooks weekly newsletter: +https://www.redbooks.ibm.com/Redbooks.nsf/subscribe?OpenForm +GLYPH Stay current on recent Redbooks publications with RSS Feeds: +http://www.redbooks.ibm.com/rss.html +Notices +This information was developed for products and services offered in the US. This material might be available from IBM in other languages. However, you may be required to own a copy of the product or product version in that language in order to access it. +IBM may not offer the products, services, or features discussed in this document in other countries. Consult your local IBM representative for information on the products and services currently available in your area. Any reference to an IBM product, program, or service is not intended to state or imply that only that IBM product, program, or service may be used. Any functionally equivalent product, program, or service that does not infringe any IBM intellectual property right may be used instead. However, it is the user's responsibility to evaluate and verify the operation of any non-IBM product, program, or service. +IBM may have patents or pending patent applications covering subject matter described in this document. The furnishing of this document does not grant you any license to these patents. You can send license inquiries, in writing, to: +IBM Director of Licensing, IBM Corporation, North Castle Drive, MD-NC119, Armonk, NY 10504-1785, US +INTERNATIONAL BUSINESS MACHINES CORPORATION PROVIDES THIS PUBLICATION "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Some jurisdictions do not allow disclaimer of express or implied warranties in certain transactions, therefore, this statement may not apply to you. +This information could include technical inaccuracies or typographical errors. Changes are periodically made to the information herein; these changes will be incorporated in new editions of the publication. IBM may make improvements and/or changes in the product(s) and/or the program(s) described in this publication at any time without notice. +Any references in this information to non-IBM websites are provided for convenience only and do not in any manner serve as an endorsement of those websites. The materials at those websites are not part of the materials for this IBM product and use of those websites is at your own risk. +IBM may use or distribute any of the information you provide in any way it believes appropriate without incurring any obligation to you. +The performance data and client examples cited are presented for illustrative purposes only. Actual performance results may vary depending on specific configurations and operating conditions. +Information concerning non-IBM products was obtained from the suppliers of those products, their published announcements or other publicly available sources. IBM has not tested those products and cannot confirm the accuracy of performance, compatibility or any other claims related to non-IBM products. Questions on the capabilities of non-IBM products should be addressed to the suppliers of those products. +Statements regarding IBM's future direction or intent are subject to change or withdrawal without notice, and represent goals and objectives only. +This information contains examples of data and reports used in daily business operations. To illustrate them as completely as possible, the examples include the names of individuals, companies, brands, and products. All of these names are fictitious and any similarity to actual people or business enterprises is entirely coincidental. +COPYRIGHT LICENSE: +This information contains sample application programs in source language, which illustrate programming techniques on various operating platforms. You may copy, modify, and distribute these sample programs in any form without payment to IBM, for the purposes of developing, using, marketing or distributing application programs conforming to the application programming interface for the operating platform for which the sample programs are written. These examples have not been thoroughly tested under all conditions. IBM, therefore, cannot guarantee or imply reliability, serviceability, or function of these programs. The sample programs are provided "AS IS", without warranty of any kind. IBM shall not be liable for any damages arising out of your use of the sample programs. +Trademarks +IBM, the IBM logo, and ibm.com are trademarks or registered trademarks of International Business Machines Corporation, registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the web at "Copyright and trademark information" at http://www.ibm.com/legal/copytrade.shtml +The following terms are trademarks or registered trademarks of International Business Machines Corporation, and might also be trademarks or registered trademarks in other countries. + + +Db2fi IBMfiIBM WatsonfiRedbooks (log o) fi Turbon +IBM z16™omicfi +IBM BlockchainfiInstanafiWebSpherefi +IBM Cloudfi IBM ClouOpen Libertyfiz/OSfi +d PakfiOpenPagesfiz16™ +IBM Telum™Redbooksfi +
+The following terms are trademarks of other companies: +Intel, Intel logo, Intel Inside logo, and Intel Centrino logo are trademarks or registered trademarks of Intel Corporation or its subsidiaries in the United States and other countries. +The registered trademark Linuxfi is used pursuant to a sublicense from the Linux Foundation, the exclusive licensee of Linus Torvalds, owner of the mark on a worldwide basis. +Red Hat and OpenShift are trademarks or registered trademarks of Red Hat, Inc. or its subsidiaries in the United States and other countries. +UNIX is a registered trademark of The Open Group in the United States and other countries. +Other company, product, or service names may be trademarks or service marks of others. +
+ +
+Back cover +REDP-5695-00 +ISBN 0738461067 +Printed in U.S.A. +
+ +
+
\ No newline at end of file diff --git a/tests/data/redp5695.json b/tests/data/redp5695.json index 03c06912..7cc69605 100644 --- a/tests/data/redp5695.json +++ b/tests/data/redp5695.json @@ -1 +1 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "redp5695.pdf", "filename-prov": null, "document-hash": "a03aa4721c6532a8bab8a84cd8fdf579b5d91b92e5e5bbf63552b451c1b1ad7e", "#-pages": 40, "collection-name": null, "description": null, "page-hashes": [{"hash": "2c6aa6caf31aededa105d495c308dfbbb82f36e74a5c918aef77cb55e270512c", "model": "default", "page": 1}, {"hash": "4c44677e63816427d586e3351fca1f60791742ff03d4e4fbd9e20049f0df1e7c", "model": "default", "page": 2}, {"hash": "9b14b4b39f406833be1be7b8a4267fbc0c9e3fb38d9c0d6be9233aaa7b34a290", "model": "default", "page": 3}, {"hash": "b393eac931d491dfb5754007d81c5eb852837b767e6bbf97532fdd9797089c84", "model": "default", "page": 4}, {"hash": "069d8724992bd8bcf023444a735733831db317258922be9bf9e68b6cb71592b3", "model": "default", "page": 5}, {"hash": "50142dc112c6479b6aa9b337444bf76fab1788845c852372d680663fac7bb708", "model": "default", "page": 6}, {"hash": "dd036a3882859d0dca411354baf6f8d89fecaa39938e0336366cdb0e70aea878", "model": "default", "page": 7}, {"hash": "b6ff3d96c10d8bb147a1678f23b1b3dc0a57314ac67de6dc51c7fed42235b8fb", "model": "default", "page": 8}, {"hash": "a9f5f13f5b38d6e5f144870e072b9b5dcda3e9efb2612d12f2f78844a2af5faf", "model": "default", "page": 9}, {"hash": "2ee65c9e3ecaf2ee28d154184562131c2c0d46fde9536daf2bf63f27cd1593fb", "model": "default", "page": 10}, {"hash": "7c3211d92edf78fb8fc2a25419e905c54df6cf08e46a6ba3af97c6ecb6f42976", "model": "default", "page": 11}, {"hash": "e9d2d2c16961c78c4252759f518e49d67e42323770ea954f0fdb5845060255b4", "model": "default", "page": 12}, {"hash": "96ec46a4b8a06f1e4a0b8ab372f0fa2eb9fa3651d598777108e2c8d841504c01", "model": "default", "page": 13}, {"hash": "85666d6d7492d1aa5e2d680aad82b243f74ac9a88d12a822f1e23a4015653280", "model": "default", "page": 14}, {"hash": "996b6243f71e4b579c257f5a8c13120d756c35db892bb3978a030b3c0244b332", "model": "default", "page": 15}, {"hash": "8d3962d8d62baba81d7c1f9136148614f19ce3e67856165e11746a4eea0ba0e2", "model": "default", "page": 16}, {"hash": "c97edc5fbc0c2ebe67d17aaa3a35994c946e63370e43599ff1a53a43cb015f4b", "model": "default", "page": 17}, {"hash": "027b2018dd204fdcfde1f23714efb239c7b0ddec82e6dcb261917047112c59c1", "model": "default", "page": 18}, {"hash": "bfd48e4d4c91d25d1ce8b96cb8b893121083e34366f2336ec2297ad429aefba5", "model": "default", "page": 19}, {"hash": "369aa7281999b4927d03fc80d04f64c4c466672e988a345b8e38b0b413d8b061", "model": "default", "page": 20}, {"hash": "07156cdcdb8bed82cd7def82ae50e3696797733c1af054e52e085d6dc4f158f3", "model": "default", "page": 21}, {"hash": "6868b382cf5f556a700cbfbd3b5019d1874f4d0b539322cf2d66d3ae91750021", "model": "default", "page": 22}, {"hash": "d18c7c0751a2b21cd90b28db97f792b76d800ef27df66b3b7551f30ae8f3c135", "model": "default", "page": 23}, {"hash": "f6bafed831071a1e8cc789bd6dc193c05982a2d11675458e0e470c69e09c39eb", "model": "default", "page": 24}, {"hash": "acd7cf74cdc0fb0f0fa69c5f5d5882fae44ead0f3e98317d0f1ca28a8bbfa0e5", "model": "default", "page": 25}, {"hash": "8618c32aa8f279cc7cfa3df0ffd5eb3f3f54d7216ae3c1af792a4cb778067f0e", "model": "default", "page": 26}, {"hash": "a158cfc6005ac6ec5857112db18ccbab469a42558439bf7c3e2ff5d63894cab4", "model": "default", "page": 27}, {"hash": "d93efe59e4c22d9f511c0df70a9ec01f5a030934af7b9e6f5232db806b143152", "model": "default", "page": 28}, {"hash": "27d5b0915a207c64ea3dfb11866a19a1ef42e54840493113126c243c4be9bd89", "model": "default", "page": 29}, {"hash": "dfd1705fa6a5b13fef569bb8a9dd567ed3950bb91004431c85bd6499135e0d98", "model": "default", "page": 30}, {"hash": "74a78ed4bb5999301ec12e980789a33cabea9131a0a8f03899a49655b7196ea4", "model": "default", "page": 31}, {"hash": "f38e9710dee1ada6c040bdf1d6b33373780790d53391dde1bdf13ba3a24237cc", "model": "default", "page": 32}, {"hash": "4c45d9a942e9431722d4e657e1772e30ed322e4c48aa8fa8e0f582cab686694f", "model": "default", "page": 33}, {"hash": "d5d0bd616da3d60f4162c3bf0c136e6fb6a3776a09daac01676eaf4f194ddbc8", "model": "default", "page": 34}, {"hash": "e68ecc6baff9b98afcefa647a55aa34f9a9fe9f507a393177097d89c5faf1901", "model": "default", "page": 35}, {"hash": "806561bd028d61b13a1d840ef5aa6bc29f596670f6a755cdff7f5ca16156bc6c", "model": "default", "page": 36}, {"hash": "d3b094a0a238bbd0866053a416a69e04d4d8efb937be01a022db3ccb24808e29", "model": "default", "page": 37}, {"hash": "af95f582613321de443a381441d8c361f4ff94fc0f777736e939927cc7d9963f", "model": "default", "page": 38}, {"hash": "28b15af143c0cf1810e8552ebaf1c58b1597cd4bb772e981b5979d0e83c98d0e", "model": "default", "page": 39}, {"hash": "b98218d5619db175ad1a9a2424365094f52356cbefadce85e557ebb182151d82", "model": "default", "page": 40}]}, "main-text": [{"text": "Front cover", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [285.9599914550781, 760.5719604492188, 417.8999938964844, 782.77197265625], "page": 1, "span": [0, 11], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"text": "IBM Cloud Pak for Data on IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [44.81999969482422, 595.87158203125, 535.7647094726562, 683.3976440429688], "page": 1, "span": [0, 31], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"text": "Redguide", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [467.2799987792969, 23.920970916748047, 571.5428466796875, 50.99158477783203], "page": 1, "span": [0, 8], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"text": "Executive overview", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 511.6919860839844, 292.852783203125, 534.8515014648438], "page": 3, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "Most industries are susceptible to fraud, which poses a risk to both businesses and consumers. According to The National Health Care Anti-Fraud Association, health care fraud alone causes the nation around $68 billion annually.$^{1}$ This statistic does not include the numerous other industries where fraudulent activities occur daily. In addition, the growing amount of data that enterprises own makes it difficult for them to detect fraud. Businesses can benefit by using an analytical platform to fully integrate their data with artificial intelligence (AI) technology.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.93536376953125, 393.2442932128906, 547.2804565429688, 476.23895263671875], "page": 3, "span": [0, 573], "__ref_s3_data": null}]}, {"text": "With IBM Cloud Pakfi for Data on IBM Z, enterprises can modernize their data infrastructure, develop, and deploy machine learning (ML) and AI models, and instantiate highly efficient analytics deployment on IBM LinuxONE. Enterprises can create cutting-edge, intelligent, and interactive applications with embedded AI, colocate data with commercial applications, and use AI to make inferences.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.92813110351562, 323.9799499511719, 547.349853515625, 382.1717224121094], "page": 3, "span": [0, 392], "__ref_s3_data": null}]}, {"text": "This IBM Redguide publication presents a high-level overview of IBM Z. It describes IBM Cloud Pak for Data (CP4D) on IBM Z and IBM LinuxONE, the different features that are supported on the platform, and how the associated features can help enterprise customers in building AI and ML models by using core transactional data, which results in decreased latency and increased throughput.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.94898986816406, 253.30953979492188, 547.2882690429688, 312.1575622558594], "page": 3, "span": [0, 385], "__ref_s3_data": null}]}, {"text": "This publication highlights real-time CP4D on IBM Z use cases. Real-time Clearing and Settlement Transactions, Trustworthy AI and its Role in Day-To-Day Monitoring, and the Prevention of Retail Crimes are use cases that are described in this publication. Using CP4D on IBM Z and LinuxONE, this publication shows how businesses can implement a highly efficient analytics deployment that minimizes latency, cost inefficiencies, and potential security exposures that are connected with data transportation.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8448028564453, 171.74127197265625, 547.2760009765625, 242.12237548828125], "page": 3, "span": [0, 503], "__ref_s3_data": null}]}, {"text": "$^{1 }$https://www.bcbsm.com/health-care-fraud/fraud-statistics.html", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [136.8000030517578, 56.842594146728516, 387.7856140136719, 66.6188735961914], "page": 3, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2023.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.84707260131836, 27.84148406982422, 180.32760620117188, 37.38951110839844], "page": 3, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "1", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.6041259765625, 27.93828010559082, 547.2176513671875, 37.469120025634766], "page": 3, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "IBM Z: An overview", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.75005340576172, 706.0162963867188, 212.3214874267578, 721.6939086914062], "page": 4, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "Ever wonder how many transactions a bank processes per day? What about the pace at which these transactions happen? According to an IBMfi report, 44 of 50 of the world's top banks use IBM Z mainframes for these daily transactions.$^{2}$ IBM Z is a platform that is designed for voluminous data, maximum security, real-time transaction analysis, and cost efficiency.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.85592651367188, 630.8654174804688, 539.5514526367188, 689.4552612304688], "page": 4, "span": [0, 365], "__ref_s3_data": null}]}, {"text": "The most recent platform for IBM Z is IBM z16\u2122. The IBM z16 supports the following features:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8168182373047, 597.2792358398438, 515.6898803710938, 619.3519287109375], "page": 4, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "GLYPH On-chip AI acceleration", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.85256958007812, 580.2994384765625, 255.07154846191406, 590.5203857421875], "page": 4, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "GLYPH Quantum-safe crypto discovery", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.62339782714844, 562.92626953125, 289.7875671386719, 573.06884765625], "page": 4, "span": [0, 45], "__ref_s3_data": null}]}, {"text": "GLYPH Simplified compliance", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.72850036621094, 546.2800903320312, 247.85403442382812, 556.7696533203125], "page": 4, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "GLYPH Flexible capacity", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6919708251953, 528.5509643554688, 225.09173583984375, 539.1024169921875], "page": 4, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "GLYPH Modernization of applications", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60623168945312, 511.6312561035156, 280.60699462890625, 521.9880981445312], "page": 4, "span": [0, 45], "__ref_s3_data": null}]}, {"text": "GLYPH Sustainability", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6087646484375, 494.7903137207031, 210.2745361328125, 505.39013671875], "page": 4, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "With these features, enterprises can upgrade applications while preserving secure and resilient data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.90225219726562, 461.2615661621094, 521.9436645507812, 483.12371826171875], "page": 4, "span": [0, 101], "__ref_s3_data": null}]}, {"text": "To learn more about these features, see the IBM z16 product page.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9076690673828, 439.1145324707031, 434.5896301269531, 449.38116455078125], "page": 4, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "Figure 1 on page 3 shows a picture of the IBM z16 mainframe.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.52610778808594, 417.182861328125, 415.693603515625, 427.33294677734375], "page": 4, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "$^{2 }$https://www.ibm.com/case-studies/bankwest/", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [136.1376495361328, 57.05204391479492, 311.9372253417969, 67.31904602050781], "page": 4, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "2", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.08910369873047, 27.93828010559082, 72.8219985961914, 37.42863464355469], "page": 4, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.64891815185547, 27.621387481689453, 261.53851318359375, 37.20967102050781], "page": 4, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 1 IBM z16", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.36338806152344, 333.7419738769531, 211.10719299316406, 343.4512634277344], "page": 5, "span": [0, 16], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"text": "IBM z16 and IBM LinuxONE Emperor 4 features", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.56404876708984, 301.7572937011719, 355.6016540527344, 314.76092529296875], "page": 5, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "IBM Z are based on enterprise mainframe technology. Starting with transaction-based workloads and databases, IBM Z has undergone tremendous transformations in its system design for many generations to build servers that cater to Linux-based workloads and security with a cyberresilient system, and support quantum computing and modernization by using a hybrid cloud with a focus on data and AI.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.82955932617188, 230.37913513183594, 547.1771240234375, 288.7164306640625], "page": 5, "span": [0, 394], "__ref_s3_data": null}]}, {"text": "3", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.1922607421875, 27.93828010559082, 547.2176513671875, 37.54865264892578], "page": 5, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "4", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.48660278320312, 27.93828010559082, 72.8219985961914, 37.547672271728516], "page": 6, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.67277526855469, 27.705074310302734, 261.53851318359375, 37.23814392089844], "page": 6, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 2 provides a snapshot of the IBM Z processor roadmap, which depicts the journey of transformation and improvement.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.30259704589844, 699.2496337890625, 543.5195922851562, 721.4502563476562], "page": 6, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "Figure 2 IBM Z: Processor roadmap", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.45831298828125, 403.8142395019531, 213.13937377929688, 413.22802734375], "page": 6, "span": [0, 33], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"text": "The IBM z16 and IBM LinuxONE Emperor 4 are the latest of the IBM Z, and they are developed with a 'built to build' focus to provide a powerful, cyberresilient, open, and secure platform for business with an extra focus on sustainability to help build sustainable data centers. Although the z16 server can host both IBM z/OSfi and Linux workloads, LinuxONE Emperor 4 is built to host Linux only workloads with a focus on consolidation and resiliency. Depending on the workload, consolidation from numerous x86 servers into a LinuxONE Emperor 4 can help reduce energy consumption by 75% and data center floor space by 50%, which helps to achieve the sustainability goals of the organization.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9113311767578, 297.0347900390625, 547.256591796875, 391.550048828125], "page": 6, "span": [0, 689], "__ref_s3_data": null}]}, {"text": "Figure 3 on page 5 shows a summary of the system design of IBM LinuxONE Emperor 4 with the IBM Telum\u2122 processor. The IBM Telum processor chip is designed to run enterprise applications efficiently where their data resides to embed AI with super low latency. The support for higher bandwidth and I/O rates is supported through FCP Express cards with an endpoint security solution. The memory subsystem supports up to 40 TB of memory.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.12657165527344, 226.86495971679688, 547.257568359375, 285.4850769042969], "page": 6, "span": [0, 432], "__ref_s3_data": null}]}, {"text": "Figure 3 System design of IBM z16 LinuxONE Emperor 4", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.32421112060547, 430.4823913574219, 297.8570861816406, 439.8866882324219], "page": 7, "span": [0, 52], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"text": "The IBM z16 and IBM LinuxONE Emperor 4 servers are built with 7-nm technology at a 5.2 GHz speed. They consist of four dual-chip modules (DCMs) per central processor complex (CPC) drawer, each of which is built with two 8-core Telum processor chips that has \"first in the industry\" on-chip acceleration for mid-transaction, real-time AI inferencing, which supports many different use cases, including fraud detection.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7873992919922, 359.77313232421875, 547.2974243164062, 417.9614562988281], "page": 7, "span": [0, 417], "__ref_s3_data": null}]}, {"text": "Each core has access to a huge private 32 MB L2 cache where up to 16 MB of the L2 cache of an inactive core can be used as virtual cache (L3 / L4) by neighboring active cores on the chip. This cache helps address translation and access checking by prefetching the same virtual cache into the L2 cache. The virtual cache also includes Neural Network Processing Assist instructions and direct memory access with protection, and per chip GZIP compression.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89752197265625, 277.57452392578125, 547.322265625, 348.19305419921875], "page": 7, "span": [0, 452], "__ref_s3_data": null}]}, {"text": "5", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.3024291992188, 27.93828010559082, 547.2176513671875, 37.632568359375], "page": 7, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "6", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.51323699951172, 27.93828010559082, 72.8219985961914, 37.4283332824707], "page": 8, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.71185302734375, 27.736722946166992, 261.53851318359375, 37.20823669433594], "page": 8, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 4 provides more information about the features of AI Accelerator integration with the IBM Z processor cores.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.292236328125, 698.827880859375, 541.310546875, 721.4586791992188], "page": 8, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "Figure 4 IBM z16 on-chip AI Accelerator integration with IBM Z processor cores", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.19121551513672, 418.4478454589844, 387.3546142578125, 428.0519714355469], "page": 8, "span": [0, 78], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/6"}, {"text": "The IBM z16 and IBM LinuxONE Emperor 4 server platforms are built with the hardware features that are shown in Figure 4 with addressing data and AI workloads in mind. Regardless of where the ML and deep learning (DL) frameworks are used to build and train data and AI models, the inferencing on existing enterprise application data can happen along currently running enterprise business applications. CP4D 4.6 supports Tensorflow and IBM Snap ML frameworks, which are optimized to use the on-chip AI Accelerator during inferencing. Support for various other frameworks is planned for future releases.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91659545898438, 323.6279296875, 547.2345581054688, 406.1714172363281], "page": 8, "span": [0, 600], "__ref_s3_data": null}]}, {"text": "Figure 5 on page 7 shows the seamless integration of AI into existing enterprises workloads on the IBM z16 while leveraging the underlying hardware capabilities.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.17147827148438, 289.6532897949219, 544.6222534179688, 311.9052734375], "page": 8, "span": [0, 161], "__ref_s3_data": null}]}, {"text": "Figure 5 Seamless integration", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.43448638916016, 481.01043701171875, 189.449951171875, 490.2646179199219], "page": 9, "span": [0, 29], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/7"}, {"text": "What is Cloud Pak for Data on IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.33121490478516, 438.1763000488281, 341.5532531738281, 453.7354431152344], "page": 9, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data allows enterprises to simplify, unify, and automate the delivery of data and AI. It categorizes the activities within the journey to AI as four rungs of the AI Ladder: Collect, Organize, Analyze, and Infuse. For more information about each of the AI Ladder rungs, see Become Data Driven with IBM Z Infused Data Fabric , REDP-5680.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0311737060547, 374.9359130859375, 547.132080078125, 421.4658508300781], "page": 9, "span": [0, 353], "__ref_s3_data": null}]}, {"text": "CP4D on IBM Z provides enterprises with a resilient and secure private cloud platform. You can use it to create ML and AI models that may be included into modern intelligent applications. You also can use it to use and construct applications for mission-critical data. With CP4D on IBM Z, enterprises can lower data movement latency, cost inefficiencies, and potential security exposures. Enterprises can safely store and access their most important company data, and leverage their current infrastructure by using cutting-edge hybrid cloud applications. Enterprises can combine their current database applications without any rewrites, which results in reduced cost and complexity. Lastly, by using CP4D on IBM Z, enterprises can update their database infrastructure to benefit from easier management, a quicker time to value, and lower operating expenses.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.93280029296875, 244.90695190429688, 545.086181640625, 363.6493225097656], "page": 9, "span": [0, 857], "__ref_s3_data": null}]}, {"text": "7", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.0161743164062, 27.93828010559082, 547.2838745117188, 37.731361389160156], "page": 9, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "8", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.47938537597656, 27.93828010559082, 72.8219985961914, 37.48422622680664], "page": 10, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [87.7466049194336, 27.745710372924805, 261.53851318359375, 37.175758361816406], "page": 10, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 6 shows a solution overview of CP4D. The infrastructure alternatives are shown at the bottom, and they include IBM Z and LinuxONE. They all leverage Red Hat OpenShift. Common Foundational Services come next, which offer clarity throughout the data and AI lifecycle, that is, from user access management to monitoring and service provisioning. A high-level view of the services is shown in the middle section. The services have several different capabilities that span the AI hierarchy. The platform can be expanded, and it offers a seamless user experience for all distinct personas across the AI lifecycle, from data gathering through AI infusion.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06129455566406, 626.6174926757812, 547.2805786132812, 721.595947265625], "page": 10, "span": [0, 655], "__ref_s3_data": null}]}, {"text": "Figure 6 Solution overview of Cloud Pak for Data", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.57273864746094, 298.9583435058594, 264.11474609375, 308.77056884765625], "page": 10, "span": [0, 48], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/8"}, {"text": "We highlight the four main pillars that make IBM Z the correct infrastructure for CP4D:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.52293395996094, 276.2957763671875, 518.3954467773438, 286.7032775878906], "page": 10, "span": [0, 87], "__ref_s3_data": null}]}, {"text": "GLYPH Performance and Scale", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.77655029296875, 259.4186096191406, 255.66061401367188, 269.49017333984375], "page": 10, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "GLYPH Embedded Accelerators", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.59234619140625, 242.43878173828125, 257.89263916015625, 252.5904083251953], "page": 10, "span": [0, 37], "__ref_s3_data": null}]}, {"text": "GLYPH Reliability and Availability", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.40530395507812, 224.8186492919922, 263.65850830078125, 235.10702514648438], "page": 10, "span": [0, 44], "__ref_s3_data": null}]}, {"text": "GLYPH Security and Governance.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4591064453125, 208.41940307617188, 269.5468444824219, 218.50930786132812], "page": 10, "span": [0, 40], "__ref_s3_data": null}]}, {"text": "From a performance perspective, CP4D on IBM Z provides your data and AI with high transaction processing and a powerful infrastructure. From the embedded accelerators perspective, CP4D on IBM Z can investigate each transaction thanks to a cutting-edge DL inference technology even in the most demanding, sensitive, and latency-prone real-time workloads. From a reliability perspective, CP4D on IBM Z provides high availability and resiliency. Lastly from the security perspective, CP4D on IBM Z is suitable for protecting sensitive data and AI models for enterprises in highly regulated industries or those industries that are worried about security.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9900665283203, 102.40120697021484, 547.2814331054688, 196.7720184326172], "page": 10, "span": [0, 650], "__ref_s3_data": null}]}, {"text": "Cloud Pak for Data capabilities on IBM Z and IBM LinuxONE", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 705.8773193359375, 519.7557983398438, 721.6634521484375], "page": 11, "span": [0, 57], "__ref_s3_data": null}]}, {"text": "With CP4D on IBM Z and IBM LinuxONE, users can develop, train, and deploy AI and ML models. Users can accomplish this task by using the CP4D IBM Watsonfi Studio and IBM Watson Machine Learning (WLM) services. By using these two fundamental services, users can accomplish the following tasks:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.6363067626953, 642.8818969726562, 544.5404052734375, 689.4312133789062], "page": 11, "span": [0, 291], "__ref_s3_data": null}]}, {"text": "GLYPH Provision various containerized databases.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7655487060547, 626.25927734375, 341.2439270019531, 636.0855102539062], "page": 11, "span": [0, 58], "__ref_s3_data": null}]}, {"text": "GLYPH Explore, clean, shape, and alter data by using Data Refinery.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.609619140625, 608.9733276367188, 423.5125427246094, 619.2939453125], "page": 11, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "GLYPH Use project-specific data that is uploaded, or connect to distant data.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.44422912597656, 592.2996826171875, 454.5639343261719, 602.7785034179688], "page": 11, "span": [0, 87], "__ref_s3_data": null}]}, {"text": "GLYPH Create Spark run times and applications.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.59793090820312, 575.0346069335938, 331.7221984863281, 585.4097900390625], "page": 11, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "GLYPH Create, build, evaluate, and deploy analytics and ML models with trust and transparency.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.42922973632812, 558.2803344726562, 544.107177734375, 568.422607421875], "page": 11, "span": [0, 104], "__ref_s3_data": null}]}, {"text": "GLYPH Leverage the AI Integrated Accelerator for TensorFlow 2.7.2 and Snap ML 1.9.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.64759826660156, 541.0048217773438, 499.1278381347656, 551.3433227539062], "page": 11, "span": [0, 92], "__ref_s3_data": null}]}, {"text": "For more information about the specifics of these capabilities, see Capabilities on Linux on IBM Z and IBM LinuxONE.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.32513427734375, 507.2811279296875, 538.98681640625, 529.5091552734375], "page": 11, "span": [0, 116], "__ref_s3_data": null}]}, {"text": "Open-source ecosystem", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 463.8086853027344, 250.52972412109375, 479.4306945800781], "page": 11, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "These days, innovation and product development are not limited to closed doors within an organization. In any industry sector, the solutions include a mix of proprietary code addressing the core business solution that is supported or integrated into other software components from open source. In some cases, enterprises business solutions also are built from open-source community offerings. Thus, open-source software becomes an important ingredient in modern-day solution building.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8963165283203, 376.8506164550781, 543.4259643554688, 447.3017272949219], "page": 11, "span": [0, 484], "__ref_s3_data": null}]}, {"text": "IBM actively participates in various open-source communities as part of steering boards defining the roadmap of the community, and also in contributing code to make the community a better place for everyone to participate. Red Hat also actively participates in various open-source communities and makes extensive contributions. In open-source communities, although most open-source development happens on x86 / amd64 or the Intel architecture, the same open-source software is used by other architectures, such as IBM Power (ppc64le), IBM Z and IBM LInuxONE (s390x), ARM, and Sparc. So, the availability of an open-source ecosystem on any architecture is key and critical to business.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9189453125, 271.0766906738281, 547.2396850585938, 365.0788269042969], "page": 11, "span": [0, 684], "__ref_s3_data": null}]}, {"text": "On IBM Z and IBM LinuxONE (s390x) architecture, there is a huge open-source support ecosystem that ranges from operating systems such as Linux; application run times; cloud and container services; DevOps and automation; big data; observability; analytics; databases; and storage. The ecosystem on IBM Z and IBM LinuxONE is growing.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0531463623047, 213.07455444335938, 537.3534545898438, 259.5094299316406], "page": 11, "span": [0, 331], "__ref_s3_data": null}]}, {"text": "IBM Z and IBM LinuxONE include much open-source software in their ecosystem. You can see the growing list of open-source software for IBM Z and LinuxONE at The Growing Ecosystem of Open-Source Software for IBM Z and LinuxONE.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2856903076172, 167.24769592285156, 540.7626342773438, 201.58775329589844], "page": 11, "span": [0, 225], "__ref_s3_data": null}]}, {"text": "IBM Z and IBM LinuxONE are available to various communities to include support for s390x builds as part of their community's continuous integration and continuous delivery (CI/CD). Also, for open-source community developers, infrastructure resources are available on a no-charge basis through the IBM LinuxONE community cloud.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.05987548828125, 108.99290466308594, 544.6069946289062, 155.43284606933594], "page": 11, "span": [0, 326], "__ref_s3_data": null}]}, {"text": "9", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [541.1886596679688, 27.93828010559082, 547.2176513671875, 37.77804183959961], "page": 11, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "10", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.68538665771484, 27.93828010559082, 78.4020004272461, 37.55242919921875], "page": 12, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.686418533325195, 267.07440185546875, 37.19207000732422], "page": 12, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "CP4D includes a mix of open-source and proprietary data and AI runtime databases; open-source run times like Python; open-source data platforms like Anaconda; ML and DL frameworks like Pytorch and Tensorflow; and thousands of reusable Python packages. All of them are available and supported on s390x architecture to provide seamless parity with x86 architecture and a seamless experience for enterprise data scientists, architects, and data and AI solution developers on IBM Z and IBM LinuxONE platforms.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9039306640625, 651.1361083984375, 547.312255859375, 721.5728759765625], "page": 12, "span": [0, 505], "__ref_s3_data": null}]}, {"text": "Anaconda is one of the open-source data platforms that provide Python and R based data science ML frameworks; analytics and data visualization tools; and open-source data science tools and libraries like Conda, XGBoost, and SciKit-Learn. Anaconda runs natively on Linux on IBM Z and IBM LinuxONE, and on IBM z/OS Container Extensions (zcX) on z/OS. For more information, see Announcing Anaconda for Linux on IBM Z and LinuxONE.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9044647216797, 581.2603759765625, 547.3501586914062, 639.3648681640625], "page": 12, "span": [0, 427], "__ref_s3_data": null}]}, {"text": "In addition to strong, open-source ecosystem support for application development on Linux and enterprise operating systems, a new generation of IBM Z and IBM LinuxONE servers (IBM z16\u2122) also have strong platform support, and AI acceleration capabilities that can be leveraged by open-source software to perform better on the server infrastructure. For example, the recently released CP4D 4.6 has Tensorflow and IBM SnapML frameworks that leverage the AI accelerators when running on an IBM z16 server.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.92164611816406, 498.8492736816406, 546.230712890625, 569.2470703125], "page": 12, "span": [0, 501], "__ref_s3_data": null}]}, {"text": "So, to summarize, there is a huge, growing data and AI open source ecosystem that is supported and optimized on IBM Z and IBM LinuxONE servers.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.26947021484375, 465.1493225097656, 521.3436889648438, 487.5268249511719], "page": 12, "span": [0, 143], "__ref_s3_data": null}]}, {"text": "Why AI on IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.2679214477539, 421.844482421875, 191.19384765625, 437.6422119140625], "page": 12, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "Data and AI playing a major role in the modernization story to enable the digital transformation journey of every organization. Many organizations recognize the business value of infusing AI into their infrastructure. CP4D provides the cloud-native solution to put your data to work. With CP4D, all your data users can collaborate from a single, unified interface that supports many services that work together, including collecting data, organizing the data, analyzing the data, and infusing AI.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91458129882812, 334.8140563964844, 547.2586059570312, 405.332275390625], "page": 12, "span": [0, 496], "__ref_s3_data": null}]}, {"text": "Traditional ML models' power most of today's ML applications in business and among AI practitioners. CP4D supports traditional ML frameworks for training and inferencing, such as Scikit-learn, Snap ML, and XGBoost. Snap ML is a library that provides high-speed training and inferencing of ML models that leverage the AI accelerator while running on an IBM z16 (Linux on IBM Z). CP4D supports DL frameworks such as TensorFlow and PyTorch. TensorFlow is a DL framework that leverages the AI accelerator while running on an IBM z16 (Linux on IBM Z).", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89390563964844, 241.02479553222656, 547.2825317382812, 323.4716491699219], "page": 12, "span": [0, 546], "__ref_s3_data": null}]}, {"text": "Figure 7 on page 11 provides an overview of the components that are supported on CP4D on IBM Z. You can leverage Watson Studio for model building, training, and validation, and WML for deployment of the model. Eventually, applications can use the AI inference endpoint to score the model.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0904541015625, 183.28155517578125, 547.3233032226562, 229.29551696777344], "page": 12, "span": [0, 288], "__ref_s3_data": null}]}, {"text": "Figure 7 Developing, training, and deploying an AI model on Cloud Pak for Data on IBM Z and IBM LinuxONE", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.48258972167969, 428.9914245605469, 506.38671875, 438.489013671875], "page": 13, "span": [0, 104], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/9"}, {"text": "In summary, here are some of the reasons why you should choose AI on IBM Z:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.08367919921875, 406.4902648925781, 492.9408264160156, 416.6065673828125], "page": 13, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "GLYPH World-class AI inference platform for enterprise workloads:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6901397705078, 389.77813720703125, 413.4001770019531, 399.8421936035156], "page": 13, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "-Embedded accelerators: A centralized on-chip AI accelerator that is shared by all cores.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.4140167236328, 360.75909423828125, 526.1881103515625, 382.4400634765625], "page": 13, "span": [0, 89], "__ref_s3_data": null}]}, {"text": "-Industry standard AI ecosystem: Many industry open-source data science frameworks are available on the platform.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.0751190185547, 331.77947998046875, 547.2465209960938, 353.9866638183594], "page": 13, "span": [0, 113], "__ref_s3_data": null}]}, {"text": "-Seamlessly integrate AI into existing enterprise workload stacks: Train anywhere, and then deploy on IBM Z.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.26991271972656, 302.2157287597656, 546.7576904296875, 324.52313232421875], "page": 13, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "GLYPH Security: Encrypted memory, and improved trusted execution environments.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60690307617188, 285.71148681640625, 490.0331726074219, 295.9700927734375], "page": 13, "span": [0, 88], "__ref_s3_data": null}]}, {"text": "GLYPH Sustainability: Reduce your energy consumption with real-time monitoring tools about the energy consumption of the system.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.39907836914062, 256.0257873535156, 547.2705078125, 278.7433776855469], "page": 13, "span": [0, 138], "__ref_s3_data": null}]}, {"text": "AI use cases", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.38404083251953, 213.536376953125, 161.7474365234375, 228.9129180908203], "page": 13, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "With billions of transactions per day in many of today's industries, it is key to get real-time insights about what is happening in your data. AI on the IBM Z stack understands these situations, and it delivers in-transaction inference in real time and at scale.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81288146972656, 162.75877380371094, 533.9012451171875, 196.6822052001953], "page": 13, "span": [0, 262], "__ref_s3_data": null}]}, {"text": "Core banking solutions running on IBM Z that are involved in processing inbound transactions need real-time fraud detection to prevent fraud. Other types of possible use cases might be credit risk analysis, anti-money laundering, loan approval, fraud detection in payments, and instant payments.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0598602294922, 104.60002136230469, 547.2466430664062, 150.73692321777344], "page": 13, "span": [0, 295], "__ref_s3_data": null}]}, {"text": "For insurance companies, a pressing use case would be claims processing. For markets and trading, clearing and settlement use cases are paramount.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.34182739257812, 70.34142303466797, 547.311279296875, 92.59844207763672], "page": 13, "span": [0, 146], "__ref_s3_data": null}]}, {"text": "11", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.570030212402344], "page": 13, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "12", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.70201873779297, 27.93828010559082, 78.4020004272461, 37.41368103027344], "page": 14, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.684078216552734, 267.07440185546875, 37.171226501464844], "page": 14, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "For the health care industry, medical image processing (such as MRIs and x-rays), skin cancer detection, and patient monitoring activities such as infant motion analysis, is important.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.13888549804688, 687.2786865234375, 525.1851196289062, 721.3170166015625], "page": 14, "span": [0, 184], "__ref_s3_data": null}]}, {"text": "For the airline industry, processes such as air traffic management, flight management systems, and flight maintenance predictions are use cases that are ideal candidates for using AI on IBM Z.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9561309814453, 641.2594604492188, 547.3113403320312, 675.2321166992188], "page": 14, "span": [0, 192], "__ref_s3_data": null}]}, {"text": "In the following sections, we describe the following use cases:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2395782470703, 619.1561279296875, 413.4231262207031, 629.3563842773438], "page": 14, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH \"Use case 1: Responsible AI augmented with risk and regulatory compliance\" on page 12 AI model lifecycle governance, risk management, and regulatory compliance are key to the success of the enterprises. It is imperative to adopt a typical AI model lifecycle to protect new end-to-end risks.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60646057128906, 561.1961059570312, 545.5465698242188, 611.9253540039062], "page": 14, "span": [0, 306], "__ref_s3_data": null}]}, {"text": "GLYPH \"Use case 2: Credit default risk assessment\" on page 22", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.66265869140625, 544.2186279296875, 402.90234375, 554.5841674804688], "page": 14, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "Core banking solutions running on IBM Z that are involved in processing inbound transactions need real-time fraud detection to prevent fraud. Other types of possible use cases might be credit risk analysis, anti-money laundering, loan approval, fraud detection in payments, and instant payments.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.4883270263672, 490.5600280761719, 547.2406005859375, 537.6137084960938], "page": 14, "span": [0, 295], "__ref_s3_data": null}]}, {"text": "GLYPH \"Use case 3: Clearing and settlement\" on page 25", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56887817382812, 474.1841735839844, 371.8055114746094, 484.5502624511719], "page": 14, "span": [0, 64], "__ref_s3_data": null}]}, {"text": "The use of AI can help to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.5205841064453, 444.71429443359375, 541.1401977539062, 467.2469177246094], "page": 14, "span": [0, 151], "__ref_s3_data": null}]}, {"text": "GLYPH \"Use case 4: Remaining Useful Life of an aircraft engine\" on page 27 We describe how AI can help to avoid unplanned aircraft downtime by determining the remaining time or cycles that an aircraft engine is likely to operate before failure.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6227569580078, 398.9003601074219, 534.64013671875, 437.9700012207031], "page": 14, "span": [0, 254], "__ref_s3_data": null}]}, {"text": "GLYPH \"Use case 5: AI-powered video analytics on an infant's motions for health prediction\" on page 30", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4189910888672, 370.28338623046875, 539.6531372070312, 392.7353515625], "page": 14, "span": [0, 112], "__ref_s3_data": null}]}, {"text": "In this section, we describe how AI can predict an infant's health conditions by monitoring real-time body movements.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.01174926757812, 340.8392639160156, 547.24267578125, 363.40240478515625], "page": 14, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "Use case 1: Responsible AI augmented with risk and regulatory compliance", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.68285369873047, 278.4156799316406, 547.2564697265625, 313.73541259765625], "page": 14, "span": [0, 72], "__ref_s3_data": null}]}, {"text": "Advancement in AI is changing the world, and organizations must adopt AI to embrace new challenges daily. Many enterprises see tremendous value in adopting AI and ML technologies while establishing organization trust in the models, underlying data, and the process to be followed. An AI model lifecycle can be a daunting task.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.70286560058594, 216.10757446289062, 547.1787719726562, 262.7936706542969], "page": 14, "span": [0, 326], "__ref_s3_data": null}]}, {"text": "How mature is your AI governance? In this section, we provide a use case demonstrating the trustworthiness of AI and its importance in daily monitoring.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.3160858154297, 181.8393096923828, 547.2424926757812, 204.10169982910156], "page": 14, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "Industry challenges", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.45439910888672, 149.6572723388672, 186.71859741210938, 162.72799682617188], "page": 14, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "Here are the three main reasons why organizations struggle with the adoption of AI:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.2456512451172, 126.27838134765625, 508.98724365234375, 136.55023193359375], "page": 14, "span": [0, 83], "__ref_s3_data": null}]}, {"text": "GLYPH Scaling with growing regulations", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.55795288085938, 109.18142700195312, 293.96282958984375, 119.35371398925781], "page": 14, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "GLYPH Lack of confidence in operationalized AI (making responsible AI)", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5261993408203, 91.55158233642578, 435.7274475097656, 102.10991668701172], "page": 14, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "GLYPH Challenges around managing the risk throughout the entire AI workflow", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4950714111328, 74.97781372070312, 466.1335144042969, 85.43976593017578], "page": 14, "span": [0, 85], "__ref_s3_data": null}]}, {"text": "Scaling with growing regulations", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.3238983154297, 709.180908203125, 324.71160888671875, 721.5367431640625], "page": 15, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "Laws and regulations in the data and AI space are accelerating, and many countries are proposing strict AI policies. Countries are monitoring adherence of these policies by the enterprises and imposing fines for any violations. Responding to these regulations are challenging global organizations where multiple regulations apply. For enterprises, it is important to adopt AI policies when there is change, and to validate explainable models to protect against discrimination.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0789337158203, 636.2792358398438, 536.3155517578125, 706.6138916015625], "page": 15, "span": [0, 476], "__ref_s3_data": null}]}, {"text": "Responsible AI", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 608.0890502929688, 223.54685974121094, 620.142333984375], "page": 15, "span": [0, 14], "__ref_s3_data": null}]}, {"text": "Responsible AI protects against loss of data privacy, and reduced customer loyalty and trust. A data scientist cannot maximize accuracy and model performance above all other concerns. Practicing responsible AI is a best practice, and you must establish protection and validation to ensure that any models that are placed into production are fair and explainable.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.01739501953125, 558.7826538085938, 547.3283081054688, 605.0657348632812], "page": 15, "span": [0, 362], "__ref_s3_data": null}]}, {"text": "Risks throughout the entire AI workflow", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.6778106689453, 531.367431640625, 364.1976623535156, 543.471435546875], "page": 15, "span": [0, 39], "__ref_s3_data": null}]}, {"text": "Organizations need to mitigate risk of the following items:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.38307189941406, 517.2207641601562, 389.47918701171875, 528.18359375], "page": 15, "span": [0, 59], "__ref_s3_data": null}]}, {"text": "GLYPH Deciding not to use certain technologies or practices", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.84344482421875, 501.0563659667969, 382.91455078125, 511.1645812988281], "page": 15, "span": [0, 69], "__ref_s3_data": null}]}, {"text": "GLYPH Using personal information when needed and with a user's consent", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.61293029785156, 484.2521667480469, 450.990234375, 494.5205993652344], "page": 15, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "GLYPH Ensuring automated decisions are free from bias", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7186737060547, 467.2592468261719, 366.2126770019531, 477.1536865234375], "page": 15, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH Customer confidence by providing explanations for business decisions", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6738739013672, 450.26641845703125, 462.3146057128906, 460.5806579589844], "page": 15, "span": [0, 84], "__ref_s3_data": null}]}, {"text": "GLYPH Fraud to the organization and to customer's accounts", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5981903076172, 433.1468200683594, 386.45635986328125, 443.06573486328125], "page": 15, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "GLYPH Delays in putting models into production", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56326293945312, 415.8823547363281, 331.2491149902344, 425.7867431640625], "page": 15, "span": [0, 56], "__ref_s3_data": null}]}, {"text": "In fact, in a recent survey, these concerns were echoed by real AI adopters when asked what aspects of trust are most important to them. Although explaining how AI decides is the primary concern, all of these concerns are important.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1046905517578, 370.3006286621094, 547.186767578125, 404.339111328125], "page": 15, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "The key point here is that risk exists throughout the entire AI lifecycle starting with the underlying data and the business justification behind the \"why\" of the project and continuing into production. Without a formalized process, there is no way to mitigate these risks to unlock the scale that is required to make automated decisions profitable. With these decisions, the business can operate proactively instead of reactively.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.93702697753906, 300.23199462890625, 547.24658203125, 358.55194091796875], "page": 15, "span": [0, 431], "__ref_s3_data": null}]}, {"text": "13", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.52984619140625], "page": 15, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "14", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.66812133789062, 27.93828010559082, 78.4020004272461, 37.513004302978516], "page": 16, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.6509952545166, 267.07440185546875, 37.20136260986328], "page": 16, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "For example, a business can start testing a model before production for fairness metrics. For this task, enterprises need an end-to-end workflow with approvals to mitigate these risks and increase the scale of AI investments, as shown in Figure 8, which presents a typical AI model lifecycle in an enterprise.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.99598693847656, 675.0350341796875, 547.3073120117188, 721.3980712890625], "page": 16, "span": [0, 309], "__ref_s3_data": null}]}, {"text": "Figure 8 Typical AI model lifecycle", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.23173522949219, 450.5171203613281, 206.54298400878906, 459.9090881347656], "page": 16, "span": [0, 35], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/10"}, {"text": "Due to regulations, more stakeholders adopt the typical AI model lifecycle to protect their brand from new end-to-end risks. To ensure various aspects of both regulatory compliance and security, the personas that must be involved include the chief financial officer (CFO), chief marketing officer (CMO), chief data officer (CDO), HR, and chief regulatory officer (CRO), along with the data engineers, data scientists, and business analysts, who build AI workflows.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.95455932617188, 367.7792053222656, 540.1202392578125, 437.9422607421875], "page": 16, "span": [0, 464], "__ref_s3_data": null}]}, {"text": "IBM governance solution for IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.70768737792969, 335.0872802734375, 279.20489501953125, 348.1524353027344], "page": 16, "span": [0, 33], "__ref_s3_data": null}]}, {"text": "AI model lifecycle governance, risk management, and regulatory compliance are key to the success of enterprises.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.95175170898438, 299.73846435546875, 540.66015625, 322.0162048339844], "page": 16, "span": [0, 112], "__ref_s3_data": null}]}, {"text": "AI governance is a comprehensive framework that uses a set of automated processes, methodologies, and tools to manage an organization's use of AI. Consistent principles guiding the design, development, deployment, and monitoring of models are critical in driving responsible and trustworthy AI. AI governance includes processes that trace and record the origin of data, models (including associated metadata), and pipelines for audits. The details of entry should include the techniques that trained each model, the hyperparameters that were used, and the metrics from testing phases. These details provide increased transparency into the model's behavior throughout the lifecycle, the data that was influential in its development, and the possible risks.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.83395385742188, 181.78021240234375, 547.3551025390625, 288.1084899902344], "page": 16, "span": [0, 755], "__ref_s3_data": null}]}, {"text": "In a world where trust, transparency and explainable AI matters, every organization wants compliance along with the comfort of understanding how analytic insights and decisions are made. The following sections describe some of the principles and organizational requirements for AI governance.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.96249389648438, 123.76123809814453, 543.354248046875, 169.8548126220703], "page": 16, "span": [0, 292], "__ref_s3_data": null}]}, {"text": "Lifecycle governance", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.5681610107422, 710.160400390625, 249.01470947265625, 721.4048461914062], "page": 17, "span": [0, 20], "__ref_s3_data": null}]}, {"text": "Lifecycle governance helps you manage your business information throughout its lifecycle, that is, from creation to deletion. IBM AI governance addresses the problems that challenge records managements:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06361389160156, 672.2786865234375, 544.0435791015625, 706.5530395507812], "page": 17, "span": [0, 202], "__ref_s3_data": null}]}, {"text": "GLYPH Monitor, catalog, and govern AI models from anywhere throughout the AI lifecycle.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.78414916992188, 655.0160522460938, 517.3616333007812, 665.3117065429688], "page": 17, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "GLYPH Automate the capture of model metadata for report generation.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.66246032714844, 637.6724243164062, 428.482666015625, 647.8308715820312], "page": 17, "span": [0, 77], "__ref_s3_data": null}]}, {"text": "GLYPH Drive transparent and explainable AI at scale.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.61790466308594, 621.2044067382812, 352.8333740234375, 631.1478271484375], "page": 17, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "GLYPH Increase accuracy of predictions by identifying how AI is used and where it is lagging.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.47805786132812, 604.2398681640625, 531.1472778320312, 614.4699096679688], "page": 17, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "Risk management", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.8000030517578, 580.1421508789062, 231.87411499023438, 591.301513671875], "page": 17, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "Risk management is used in IBM AI governance to identify, manage, monitor, and report on risk and compliance initiatives at scale:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.37351989746094, 553.9462890625, 544.0723266601562, 575.8314819335938], "page": 17, "span": [0, 130], "__ref_s3_data": null}]}, {"text": "GLYPH Automate facts and workflow management to comply with business standards.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.86279296875, 537.0994262695312, 497.7820739746094, 547.1629028320312], "page": 17, "span": [0, 89], "__ref_s3_data": null}]}, {"text": "GLYPH Use dynamic dashboards for clear and concise customizable results.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.80929565429688, 520.2988891601562, 455.0130310058594, 530.4574584960938], "page": 17, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "GLYPH Enhanced collaboration across multiple regions and geographies.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.7198486328125, 502.8976135253906, 440.54815673828125, 513.4384155273438], "page": 17, "span": [0, 79], "__ref_s3_data": null}]}, {"text": "Regulatory compliance", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [136.74740600585938, 479.4071350097656, 258.4198303222656, 490.6507568359375], "page": 17, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "Regulatory compliance is a set of rules that organizations must follow to protect sensitive information and ensure human safety. Any business that works with digital assets, consumer data, health regulations, employee safety, and private communications is subject to regulatory compliance.$^{3}$ The IBM AI governance solution for IBM Z includes the following tasks:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.20217895507812, 428.6148681640625, 547.2466430664062, 475.2479248046875], "page": 17, "span": [0, 366], "__ref_s3_data": null}]}, {"text": "GLYPH Help adhere to external AI regulations for audit and compliance.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.8566436767578, 412.22723388671875, 433.3389892578125, 422.5816955566406], "page": 17, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "GLYPH Convert external AI regulations into policies for automatic enforcement.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.61163330078125, 395.0362243652344, 465.02978515625, 405.4002990722656], "page": 17, "span": [0, 88], "__ref_s3_data": null}]}, {"text": "GLYPH Use dynamic dashboards for compliance status across policies and regulations.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.56532287597656, 378.1607971191406, 503.326171875, 388.5216064453125], "page": 17, "span": [0, 93], "__ref_s3_data": null}]}, {"text": "Enterprises can develop AI models and deploy them by using IBM Watson Studio or WML on CP4D on Red Hat OpenShift on a virtual machine that is based on IBM z/VM or Red Hat Enterprise Linux KVM on IBM Z. AI governance on IBM LinuxONE is supported in the following two ways:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.35816955566406, 319.4354248046875, 547.2515869140625, 365.9399719238281], "page": 17, "span": [0, 271], "__ref_s3_data": null}]}, {"text": "GLYPH Monitor the AI models with Watson OpenScale on CP4D on Red Hat OpenShift on a virtual machine on IBM Z.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.70884704589844, 291.2804870605469, 526.8416137695312, 313.489013671875], "page": 17, "span": [0, 119], "__ref_s3_data": null}]}, {"text": "GLYPH Enterprises can develop AI models by creating and training models by using Watson Studio and development tools such as Jupyter Notebook or JupyterLab, and then deploying the model onto WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z. Then, these enterprises can achieve end-end AI governance by running AI Factsheets, IBM Watson OpenScale, and IBM Watson OpenPagesfi on CP4D on x86.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.62962341308594, 225.8667449951172, 541.8055419921875, 283.9451904296875], "page": 17, "span": [0, 413], "__ref_s3_data": null}]}, {"text": "Figure 9 on page 16 shows the end-to-end flow for a remote AI governance solution.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.40660095214844, 204.2818603515625, 512.4911499023438, 214.43418884277344], "page": 17, "span": [0, 82], "__ref_s3_data": null}]}, {"text": "$^{3 }$https://www.proofpoint.com/us/threat-reference/regulatory-compliance", "type": "footnote", "name": "Footnote", "font": null, "prov": [{"bbox": [136.0848846435547, 56.93043899536133, 418.2659606933594, 66.85415649414062], "page": 17, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "15", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.63853454589844], "page": 17, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Figure 9 Remote AI governance solution end-to-end flow", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.46823120117188, 488.9634094238281, 295.7712097167969, 498.658203125], "page": 18, "span": [0, 54], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/11"}, {"text": "To achieve end-to-end AI governance, complete the following steps:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.7501678466797, 466.32562255859375, 438.0164794921875, 476.6277160644531], "page": 18, "span": [0, 66], "__ref_s3_data": null}]}, {"text": "1. Create a model entry in IBM OpenPages by using CP4D on a x86 platform, as shown in Figure 10.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.78916931152344, 437.65087890625, 541.7039184570312, 460.17938232421875], "page": 18, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "Figure 10 Creating a model entry in IBM OpenPages", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.23847198486328, 114.00309753417969, 279.3128356933594, 123.19122314453125], "page": 18, "span": [0, 49], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/12"}, {"text": "16", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.66849517822266, 27.93828010559082, 78.4020004272461, 37.487548828125], "page": 18, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.65258026123047, 267.07440185546875, 37.216922760009766], "page": 18, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "2. Train a model by using Watson Studio and by using development tools such as Jupyter Notebook or JupyterLab on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, as shown in Figure 11.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2081298828125, 686.9100952148438, 542.9114379882812, 721.3231811523438], "page": 19, "span": [0, 192], "__ref_s3_data": null}]}, {"text": "Figure 11 Training an AI model by using Watson Studio", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.40596008300781, 366.4603576660156, 290.68634033203125, 375.5119934082031], "page": 19, "span": [0, 53], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/13"}, {"text": "3. Deploy the model by using WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, as shown in Figure 12.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.1393585205078, 331.91552734375, 547.2686767578125, 353.865966796875], "page": 19, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "Figure 12 Deploying an AI model by using WML on Cloud Pak for Data", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.62543487548828, 56.46229553222656, 351.1957092285156, 65.96401977539062], "page": 19, "span": [0, 66], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/14"}, {"text": "17", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.6064338684082], "page": 19, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "4. Track the external model lifecycle by browsing through the Catalogs/Platform assets catalog by using AI Factsheets and OpenPages while using CP4D on an x86 platform, as shown in Figure 13. The external model (deployed on CP4D on Red Hat OpenShift on a virtual machine on IBM Z) is saved as a platform asset catalog on the x86 platform.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.96473693847656, 674.9818725585938, 547.24560546875, 721.3460083007812], "page": 20, "span": [0, 338], "__ref_s3_data": null}]}, {"text": "Figure 13 External model", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.30557250976562, 394.5406799316406, 242.96470642089844, 403.65936279296875], "page": 20, "span": [0, 24], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/15"}, {"text": "You can track the model through each stage of the model lifecycle, as shown in Figure 14, by using AI Factsheets and OpenPages.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.36965942382812, 359.5168151855469, 547.290283203125, 382.07330322265625], "page": 20, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "Figure 14 Tracking the model", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.4922866821289, 72.41610717773438, 187.39756774902344, 81.66903686523438], "page": 20, "span": [0, 28], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/16"}, {"text": "18", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.6649169921875, 27.93828010559082, 78.4020004272461, 37.509376525878906], "page": 20, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.659908294677734, 267.07440185546875, 37.17069625854492], "page": 20, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "You can see that the model facts are tracked and synchronized to IBM OpenPages for risk management, as shown in Figure 15.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.21810913085938, 698.8858642578125, 547.2222290039062, 721.4878540039062], "page": 21, "span": [0, 122], "__ref_s3_data": null}]}, {"text": "Figure 15 Model facts that are tracked and synchronized to IBM OpenPages on an x86 platform", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.2066421508789, 367.82769775390625, 450.17156982421875, 377.36688232421875], "page": 21, "span": [0, 91], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/17"}, {"text": "19", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.720638275146484], "page": 21, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "5. Create an external model by using IBM OpenScale on the x86 platform, as shown in Figure 16.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.3836669921875, 699.1000366210938, 525.693115234375, 721.4442749023438], "page": 22, "span": [0, 94], "__ref_s3_data": null}]}, {"text": "Figure 16 Creating an external model on an x86 platform", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.4760971069336, 398.4433898925781, 295.7671203613281, 407.997802734375], "page": 22, "span": [0, 55], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/18"}, {"text": "IBM OpenScale provides a comprehensive dashboard that tracks fairness, quality monitoring, drift, and explainability of a model. Fairness determines whether your model produces biased outcomes. Quality determines how well your model predicts outcomes. Drift is the degradation of predictive performance over time. A sample is shown in Figure 17 on page 21.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8937530517578, 339.7777404785156, 547.329345703125, 386.164794921875], "page": 22, "span": [0, 356], "__ref_s3_data": null}]}, {"text": "20", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.96902084350586, 27.93828010559082, 78.4020004272461, 37.76066970825195], "page": 22, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.654016494750977, 267.07440185546875, 37.2003059387207], "page": 22, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 17 IBM OpenScale dashboard that is used to monitor the external model", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.54642486572266, 428.96380615234375, 386.5538024902344, 438.4689636230469], "page": 23, "span": [0, 76], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/19"}, {"text": "You developed and deployed the AI model by using Watson Studio, WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, and end-to-end AI model governance by leveraging AI Factsheets, OpenScale, and OpenPages on CP4D on a x86 platform. Figure 18 shows end-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.88458251953125, 358.59173583984375, 547.2167358398438, 416.6777038574219], "page": 23, "span": [0, 340], "__ref_s3_data": null}]}, {"text": "Figure 18 Final result: End-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.40601348876953, 57.904937744140625, 507.6827087402344, 67.22319793701172], "page": 23, "span": [0, 103], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/20"}, {"text": "21", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.4241333007812, 27.93828010559082, 547.2591552734375, 37.61942672729492], "page": 23, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Use case 2: Credit default risk assessment", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 706.0162963867188, 389.3597717285156, 721.7960205078125], "page": 24, "span": [0, 42], "__ref_s3_data": null}]}, {"text": "In today's world, many individuals or businesses seeking loans to meet their growing business needs often look to financial institutions. Financial institutions can offer loans to individuals or businesses and charge interest based on the current market situations.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.03933715820312, 655.13671875, 547.2247314453125, 689.2626953125], "page": 24, "span": [0, 265], "__ref_s3_data": null}]}, {"text": "Industry challenges", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.6015853881836, 622.3898315429688, 186.71859741210938, 635.3670654296875], "page": 24, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "Financial institutions must make an accurate decision about whether to sanction a loan or not, and judging the likelihood of default is the difference between a successful and unsuccessful loan portfolio. In a traditional scenario, an experienced banker can judge someone's likelihood of default, but that is not an efficient method for judgment as a business grows.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.15077209472656, 563.016357421875, 547.2955322265625, 609.4694213867188], "page": 24, "span": [0, 366], "__ref_s3_data": null}]}, {"text": "Predictions of credit default risk assessment", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.78274536132812, 530.6572875976562, 341.3221740722656, 543.4954833984375], "page": 24, "span": [0, 45], "__ref_s3_data": null}]}, {"text": "In the modern world, growing business institutions can no longer rely on only experienced bankers to decide whether to sanction a loan knowing that there is a probability that the borrower might default on their loans. A better choice is to rely on technological advancements that can help with reasoning based on facts, such as leveraging credit risk modeling techniques to process the historical data of past borrowers to understand their credit behavior and make a more informed decision about whether to lend money, how much money, and decide on the tenure to close the loan.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.8963165283203, 434.8458251953125, 547.2635498046875, 517.2132568359375], "page": 24, "span": [0, 579], "__ref_s3_data": null}]}, {"text": "Financial institutions can leverage AI solutions by using ML techniques to predict the credit risk. Applying AI to credit risk modeling techniques can benefit institutions in decision-making, and thus can help better manage the exposure to credit risk.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.3209991455078, 389.1979064941406, 547.1471557617188, 423.1103210449219], "page": 24, "span": [0, 252], "__ref_s3_data": null}]}, {"text": "Figure 19 on page 23 shows a sample architecture about how to design and develop an AI model for credit risk assessment on IBM Z. An IBM WebSpherefi Application Server is used for handling in-bound transactions, and CP4D is used for AI model lifecycle management that includes building, training, and deploying the model.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1620635986328, 331.1011047363281, 547.2008666992188, 377.3158874511719], "page": 24, "span": [0, 321], "__ref_s3_data": null}]}, {"text": "22", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.99784469604492, 27.93828010559082, 78.4020004272461, 37.64631652832031], "page": 24, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.69827651977539, 267.07440185546875, 37.1955451965332], "page": 24, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 19 Architecture for credit risk prediction by using an ML AI model on IBM Z", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.55125427246094, 439.1292724609375, 395.7654113769531, 448.4423828125], "page": 25, "span": [0, 82], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/21"}, {"text": "A data scientist can leverage Watson Studio to develop and train an AI model and WML to deploy and score the model. In this sample architecture, the WML Python run time leverages the ML framework, IBM Snap Machine Learning (Snap ML), for scoring, can leverage an integrated AI accelerator at the time of model import.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81259155273438, 380.48779296875, 547.34521484375, 426.5666809082031], "page": 25, "span": [0, 317], "__ref_s3_data": null}]}, {"text": "Then, the banking loan approval team can send a loan applicant request to the IBM WebSphere Application Server, which can make a request to the AI inference endpoint. The AI inference engine scores the transaction and sends the result back to the loan approval team. Based on the results, the approval team can decide on whether to approve a loan or not, and also decide how much they can lend, timelines, and other factors.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.68914794921875, 310.3835754394531, 545.5831909179688, 368.8029479980469], "page": 25, "span": [0, 424], "__ref_s3_data": null}]}, {"text": "The transaction system that is shown in Figure 19 uses IBM WebSphere Liberty as an application server, but you also can use an IBM Open Libertyfi application server or any application server that can send RESTful API communications.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.96060180664062, 264.4761962890625, 528.6572875976562, 298.59375], "page": 25, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "Models are frequently developed and tested in many platforms and languages, such as Python, Scala, R, and Go. Models can leverage ML frameworks like scikit-learn, Snap ML, or XGBoost, or DL frameworks like TensorFlow or PyTorch. Training a model can be done on any platform if you have enough computing power for complex models, but moving that model into production requires careful testing to ensure that transactions are not delayed, especially if you plan to run the model within a transaction.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.14891052246094, 182.34982299804688, 547.310546875, 252.74313354492188], "page": 25, "span": [0, 498], "__ref_s3_data": null}]}, {"text": "We showed how IBM Z enable customers to use AI frameworks to detect credit risk. Now, we look at how you can leverage CP4D and TensorFlow on IBM Z to detect the credit risk.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84275817871094, 148.16622924804688, 547.2376708984375, 170.75857543945312], "page": 25, "span": [0, 173], "__ref_s3_data": null}]}, {"text": "23", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.4790649414062, 27.93828010559082, 547.2591552734375, 37.54842758178711], "page": 25, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "24", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.0537338256836, 27.93828010559082, 78.4020004272461, 37.74729919433594], "page": 26, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.697975158691406, 267.07440185546875, 37.23467254638672], "page": 26, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 20 shows an architecture for predicting credit risk by using DL on IBM Z.", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [136.27874755859375, 710.9722900390625, 489.5701599121094, 721.4159545898438], "page": 26, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "Figure 20 Architecture for credit risk prediction by using DL on IBM Z", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.493896484375, 421.176025390625, 344.1890563964844, 430.731689453125], "page": 26, "span": [0, 70], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/22"}, {"text": "Data scientists can start creating and training a DL AI model by using a Jupyter Notebook instance and Watson Studio. Then, they can deploy the model by using WML on CP4D running on IBM Z, which provides an endpoint. Other applications, including the IBM WebSphere server, can produce credit risk results by using the model's endpoint.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.09727478027344, 362.354736328125, 534.5686645507812, 408.55877685546875], "page": 26, "span": [0, 335], "__ref_s3_data": null}]}, {"text": "In summary, here are some considerations for developing real-time AI models, such as credit risk assessment:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.12158203125, 328.7194519042969, 547.2158813476562, 350.3298645019531], "page": 26, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "GLYPH A preference for in-platform run times of the model, such as faster execution results.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60023498535156, 311.7396545410156, 522.9054565429688, 322.047607421875], "page": 26, "span": [0, 102], "__ref_s3_data": null}]}, {"text": "GLYPH Less overhead in the end-to-end flows might improve scoring time.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.54747009277344, 294.29937744140625, 445.5369873046875, 304.6208801269531], "page": 26, "span": [0, 81], "__ref_s3_data": null}]}, {"text": "GLYPH If you are using models that are not deployable, CP4D offers a custom Python run time to build your own stack if they are not available on the platform.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.34133911132812, 265.28106689453125, 547.3232421875, 288.05682373046875], "page": 26, "span": [0, 168], "__ref_s3_data": null}]}, {"text": "GLYPH AI inferencing based on ML or DL models can increase the accuracy of better credit risk assessment.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.48162841796875, 236.74082946777344, 541.7804565429688, 258.6591491699219], "page": 26, "span": [0, 115], "__ref_s3_data": null}]}, {"text": "GLYPH Using IBM z16 and on-chip AI acceleration with the Telum chip that is embedded with regular Integrated Facility for Linux (IFLs) provides an execution speed for your transactions that cannot be achieved by other means.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.3498992919922, 195.72576904296875, 531.8067626953125, 230.00747680664062], "page": 26, "span": [0, 234], "__ref_s3_data": null}]}, {"text": "Use case 3: Clearing and settlement", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.78247833251953, 705.360595703125, 338.5379638671875, 721.9246826171875], "page": 27, "span": [0, 35], "__ref_s3_data": null}]}, {"text": "Clearing and settlements involve banks or financial institutions sending and receiving wire transfers by using secure interbank payments networks that can clear or settle numerous transactions. When an individual or business entity initiates a wire transfer, clearing begins the fund delivery process. Banks can begin the settlement phase either immediately after clearing takes place or later, mostly at the end of the business day.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9156036376953, 630.8539428710938, 539.5654907226562, 689.4520263671875], "page": 27, "span": [0, 433], "__ref_s3_data": null}]}, {"text": "Industry challenge", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.50138092041016, 598.3314208984375, 179.53228759765625, 611.3511352539062], "page": 27, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "Banks and financial institutions must deal with high-risk transactions that can lead to loss. Moreover, these transactions can lead to regulatory violations and extra compliance costs.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.24351501464844, 563.0052490234375, 538.4359130859375, 585.32373046875], "page": 27, "span": [0, 184], "__ref_s3_data": null}]}, {"text": "Clearing and settlement solution", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.65296936035156, 530.1951904296875, 266.077880859375, 543.4844970703125], "page": 27, "span": [0, 32], "__ref_s3_data": null}]}, {"text": "Use AI to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. The expedited remediation of questionable transactions can prevent costly consequences, regulatory violations, and negative business impacts.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9798126220703, 471.2395324707031, 545.6968383789062, 517.1358032226562], "page": 27, "span": [0, 277], "__ref_s3_data": null}]}, {"text": "In financial institutions, finding which financial transactions are legitimate and which transactions are fraudulent is of paramount importance. In this section, we go through a use case where we use AI to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. The expedited remediation of questionable transactions can prevent costly consequences, regulatory violations, and negative business impacts to financial institutions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.96380615234375, 389.0691223144531, 544.662109375, 459.2572937011719], "page": 27, "span": [0, 499], "__ref_s3_data": null}]}, {"text": "The goal is to predict in real time whether the transaction being processed might be a fraudulent transaction or not. To achieve this goal, we build an ML model that can do this prediction for the financial institution. Because there would be many transactions being processed at any point by the financial institution, it is important to perform this prediction of fraudulent transactions in near-real time in a few milliseconds.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.98394775390625, 319.3011169433594, 543.3472900390625, 377.23699951171875], "page": 27, "span": [0, 430], "__ref_s3_data": null}]}, {"text": "One possible solution is to build and train a TensorFlow based DL model that learns from the historical data and predicts the fraudulent transactions. CP4D on IBM Z and IBM LinuxONE is a suitable product where this task can be achieved and the model deployed, and coming up with a serving endpoint.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0880584716797, 261.0608825683594, 547.2335815429688, 307.2639465332031], "page": 27, "span": [0, 298], "__ref_s3_data": null}]}, {"text": "25", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.4534301757812, 27.93828010559082, 547.2591552734375, 37.55341339111328], "page": 27, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "26", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.914608001708984, 27.93828010559082, 78.4020004272461, 37.52546691894531], "page": 28, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.69200897216797, 267.07440185546875, 37.19856643676758], "page": 28, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Figure 21 provides a high-level diagram of a clearing and settlement use case for financial transactions that uses CP4D on IBM Z and IBM LinuxONE.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.05018615722656, 699.2785034179688, 537.352294921875, 721.2307739257812], "page": 28, "span": [0, 146], "__ref_s3_data": null}]}, {"text": "Figure 21 Clearing and settlement use case for financial transactions by using Cloud Pak for Data", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.39627838134766, 467.5994567871094, 459.98809814453125, 477.4582214355469], "page": 28, "span": [0, 97], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/23"}, {"text": "Here are the steps of the high-level process flow:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.23934936523438, 445.1138916015625, 353.37115478515625, 455.2171325683594], "page": 28, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "1. Create a connection to a database (for example, an IBM Db2fi database) where the historical data will be used for ML model building.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.8000030517578, 415.7784729003906, 524.740966796875, 438.1050720214844], "page": 28, "span": [0, 135], "__ref_s3_data": null}]}, {"text": "2. Read the data from the database and prepare the data for AI by using the Data Refinery tool in CP4D.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.04005432128906, 387.2192687988281, 542.9837646484375, 409.4548034667969], "page": 28, "span": [0, 103], "__ref_s3_data": null}]}, {"text": "3. A Jupyter Notebook or JupyterLab IDE that is provided by the Watson Studio component in CP4D helps us build and train the AI model. The trained model can be saved into a WML repository.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2370147705078, 345.5196533203125, 545.7424926757812, 380.059326171875], "page": 28, "span": [0, 188], "__ref_s3_data": null}]}, {"text": "4. Deploy the saved model into a deployment space for batch deployment.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.07083129882812, 328.86895751953125, 468.5547790527344, 339.33837890625], "page": 28, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "5. Create a batch deployment by using any of these interfaces:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.43487548828125, 311.5809631347656, 417.2825622558594, 322.1788330078125], "page": 28, "span": [0, 62], "__ref_s3_data": null}]}, {"text": "a. Watson Studio user interface from an Analytics deployment space.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.6441192626953, 294.92474365234375, 460.12939453125, 305.1920471191406], "page": 28, "span": [0, 67], "__ref_s3_data": null}]}, {"text": "b. WML Python client.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.69277954101562, 277.7862243652344, 251.6896209716797, 288.1532287597656], "page": 28, "span": [0, 21], "__ref_s3_data": null}]}, {"text": "c. WML REST APIs.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.7314910888672, 261.2212829589844, 244.95565795898438, 271.8070373535156], "page": 28, "span": [0, 17], "__ref_s3_data": null}]}, {"text": "6. A hardware configuration can be chosen for the deployment.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.27818298339844, 243.74349975585938, 418.506591796875, 253.9542694091797], "page": 28, "span": [0, 61], "__ref_s3_data": null}]}, {"text": "7. A batch deployment processes input data from a file, data connection, or connected data in a storage bucket, and writes the output to a selected destination.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.3677978515625, 214.9309539794922, 545.685791015625, 237.33485412597656], "page": 28, "span": [0, 160], "__ref_s3_data": null}]}, {"text": "8. One way to run batch deployment to predict or score is to create and run a batch deployment job.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.3165283203125, 185.6129150390625, 510.0397033691406, 207.9795379638672], "page": 28, "span": [0, 99], "__ref_s3_data": null}]}, {"text": "9. Provide an input data type:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.2800750732422, 168.84835815429688, 270.1285705566406, 179.39981079101562], "page": 28, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "a. Inline data for entering a JSON format payload.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.57568359375, 151.5411834716797, 374.55621337890625, 161.9819793701172], "page": 28, "span": [0, 50], "__ref_s3_data": null}]}, {"text": "b. Select Data asset , click Select data source , and then specify your asset.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.6676025390625, 135.0714874267578, 492.3292236328125, 145.2565460205078], "page": 28, "span": [0, 78], "__ref_s3_data": null}]}, {"text": "10.The output data type can be a new output file or a connected data asset.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80201721191406, 117.79898834228516, 471.90997314453125, 127.81742095947266], "page": 28, "span": [0, 75], "__ref_s3_data": null}]}, {"text": "11.A Kubernetes admin can change the maximum number of concurrent batch jobs that can be run.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80201721191406, 89.26384735107422, 546.2705688476562, 111.39698028564453], "page": 28, "span": [0, 93], "__ref_s3_data": null}]}, {"text": "12.Get the deployment endpoint URL. For more information, see Getting the deployment endpoint URL.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [136.80201721191406, 59.67621612548828, 531.2200927734375, 81.98482513427734], "page": 28, "span": [0, 98], "__ref_s3_data": null}]}, {"text": "Summary", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.38957977294922, 708.2412719726562, 124.10479736328125, 721.2948608398438], "page": 29, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "With this use case, we attempted to demonstrate how to predict, in real time, whether the transaction that is being processed might be a fraudulent transaction or not. By using the method, you have the following advantages:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.63417053222656, 660.6920776367188, 532.9658813476562, 695.446533203125], "page": 29, "span": [0, 223], "__ref_s3_data": null}]}, {"text": "GLYPH No Impact to SLAs and the batch process window.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5712432861328, 643.5092163085938, 374.5103454589844, 654.0914306640625], "page": 29, "span": [0, 63], "__ref_s3_data": null}]}, {"text": "GLYPH Proactively stop losses, and lower operational, regulatory, and compliance costs.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.46438598632812, 626.6151123046875, 508.5062255859375, 637.4598388671875], "page": 29, "span": [0, 97], "__ref_s3_data": null}]}, {"text": "GLYPH The solution is using a DL framework like TensorFlow for high-performing, low latency scoring.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.59495544433594, 598.0394287109375, 533.5338134765625, 620.7293701171875], "page": 29, "span": [0, 110], "__ref_s3_data": null}]}, {"text": "Use case 4: Remaining Useful Life of an aircraft engine", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.5902099609375, 554.939453125, 482.53704833984375, 571.1951293945312], "page": 29, "span": [0, 55], "__ref_s3_data": null}]}, {"text": "In this use case, we describe how an airline can deploy an AI model for inferencing by using IBMfi zSystems.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.06735229492188, 516.2785034179688, 545.7247314453125, 538.8272094726562], "page": 29, "span": [0, 108], "__ref_s3_data": null}]}, {"text": "Remaining Useful Life (RUL) is the remaining time or cycles that an aircraft engine is likely to operate without any failure. In this case, it is the equivalent of the number of flights remaining for the engine after the last flight. By estimating RUL, the operator can decide on the next maintenance schedule and avoid unplanned downtime.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.07571411132812, 458.041015625, 547.2705688476562, 504.0214538574219], "page": 29, "span": [0, 339], "__ref_s3_data": null}]}, {"text": "Figure 22 provides an overview of the inferencing architecture for the RUL of an aircraft engine when using IBM Z.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.3302459716797, 424.0997314453125, 525.1622924804688, 446.7457275390625], "page": 29, "span": [0, 114], "__ref_s3_data": null}]}, {"text": "Figure 22 Inferencing architecture on IBM Z", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.4977035522461, 161.0757598876953, 244.7244415283203, 170.71823120117188], "page": 29, "span": [0, 43], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/24"}, {"text": "Because we are looking into data-driven model development, the data set of our target is the run-to-failure data of the engine. We are looking into a supervised learning problem, and we use regression techniques to learn from the data. DL techniques such as Long Short-Term Memory (LSTM) or Gated Recurrent Units (GRU) are our choice because we are looking into a time series data set. TensorFlow or PyTorch frameworks are leveraged to create models. AI governance monitors the data and model drift to maintain the model quality throughout the model's life.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.01132202148438, 66.6394271850586, 547.2557373046875, 148.60031127929688], "page": 29, "span": [0, 557], "__ref_s3_data": null}]}, {"text": "27", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.3837280273438, 27.93828010559082, 547.2591552734375, 37.68974685668945], "page": 29, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "28", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.9516716003418, 27.93828010559082, 78.4020004272461, 37.51277542114258], "page": 30, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.685588836669922, 267.07440185546875, 37.18029022216797], "page": 30, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Open-source data from NASA was used to build the AI model, which then was deployed on CP4D. CP4D enables the data-scientist's journey from modeling to deployment in a seamless process. Data engineers leverage Db2 to host the data set, which includes the training, testing, and validation of a data set. Since data is hosted on Db2, you can expect low latency while retrieving the data and serve data security needs because Db2 is hosted on the IBM Z platform. Data is fetched by the data refinery to do the necessary pre-processing and data imputations. You can use the programming languages Golang or C++ for real-time predictions, depending on customer needs. For more information about this topic, see \"Use case 3: Clearing and settlement\" on page 25.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9266815185547, 614.9383544921875, 547.2824096679688, 721.2327270507812], "page": 30, "span": [0, 754], "__ref_s3_data": null}]}, {"text": "Model building is done on Watson Studio, leveraging the high-performance computing hardware on IBM Z. You can train the model anywhere (on your own hardware or the cloud) and bring the model directly into CP4D, which provides data scientists with the flexibility of implementation choices.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1148681640625, 557.2208862304688, 545.1770629882812, 603.3070678710938], "page": 30, "span": [0, 289], "__ref_s3_data": null}]}, {"text": "We used LSTM to build the AI model and used the training data. The model was continuously evaluated to model convergence. The final model is tested with the test data, which is never exposed at the time of training to make sure that the model works.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.87142944335938, 510.6993713378906, 547.212890625, 545.31201171875], "page": 30, "span": [0, 249], "__ref_s3_data": null}]}, {"text": "This model is deployed on WML on CP4D and runs on IBM Z. If required, the trained model can be converted to the Open Neural Network Exchange (ONNX) format before deployment. Based on project requirements, IBM Z supports high-throughput, low latency inference requirements by leveraging an AI accelerator.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.99119567871094, 453.0931396484375, 547.3082885742188, 499.47491455078125], "page": 30, "span": [0, 304], "__ref_s3_data": null}]}, {"text": "For decision-making about an aircraft engine's life, it is important to be able to explain the model predictions from end to end. This explainability may be global or local. Global explainability enables decision-makers to evaluate the trained model in general from the subject matter expert (SME) point of view. Local explainability enables the operator to validate the reasons behind the present inference and relate it to the past data points, which are an indicative cause of the prediction.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.13357543945312, 371.263671875, 547.282470703125, 441.48968505859375], "page": 30, "span": [0, 495], "__ref_s3_data": null}]}, {"text": "The AI governance components such as IBM OpenScale on CP4D support explainability and manages the drifts in data and concept. OpenPages and AI FactSheet together can alert the stakeholders about important events through a dashboard and allow course correction at any point.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.87094116210938, 313.0604553222656, 547.32421875, 359.3154296875], "page": 30, "span": [0, 273], "__ref_s3_data": null}]}, {"text": "Client-side applications can invoke a REST apiserver that handles some preprocessing of an incoming request before initiating the inference pipeline. Efficiencies might be needed in real-time applications, and inference response time can be reduced by adopting low-level programming while components are communicating.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9972686767578, 255.09030151367188, 547.2745361328125, 301.46954345703125], "page": 30, "span": [0, 318], "__ref_s3_data": null}]}, {"text": "Figure 23 on page 29 provides a more in-depth view of the architecture of an AI-based predictive maintenance application.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.4759063720703, 220.89736938476562, 521.204345703125, 243.2855987548828], "page": 30, "span": [0, 121], "__ref_s3_data": null}]}, {"text": "Figure 23 In-depth architectural view", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.46824645996094, 340.3482971191406, 216.07789611816406, 349.8203430175781], "page": 31, "span": [0, 37], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/25"}, {"text": "In summary, consider the following points while developing an AI-based predictive maintenance application:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.14920043945312, 305.67156982421875, 501.76422119140625, 327.83966064453125], "page": 31, "span": [0, 106], "__ref_s3_data": null}]}, {"text": "GLYPH CP4D offers a Python run time to build a custom solution stack, but also supports different components like Watson Studio, WML, Db2, Data Refinery, OpenScale, AI Factsheets, and OpenPages.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.6149139404297, 264.79376220703125, 547.3203125, 299.5812683105469], "page": 31, "span": [0, 204], "__ref_s3_data": null}]}, {"text": "GLYPH The trustworthiness of the predicted output is important for critical use cases.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.60621643066406, 247.86065673828125, 491.34027099609375, 257.88116455078125], "page": 31, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "GLYPH IBM Z provides high data security and low latency requirements at scale for the critical applications.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.48333740234375, 219.0656280517578, 534.4998779296875, 241.49769592285156], "page": 31, "span": [0, 118], "__ref_s3_data": null}]}, {"text": "GLYPH A data scientist can choose to train the model and deploy it on CP4D seamlessly with the latest tech stack that is available.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4168243408203, 190.18487548828125, 547.2156982421875, 212.0349884033203], "page": 31, "span": [0, 141], "__ref_s3_data": null}]}, {"text": "GLYPH The AIOps and MLOps supported by CP4D to track AI model and data lifecycle throughout the application lifecycle.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.57362365722656, 160.84478759765625, 504.0291748046875, 183.60284423828125], "page": 31, "span": [0, 128], "__ref_s3_data": null}]}, {"text": "29", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.4987182617188, 27.93828010559082, 547.2591552734375, 37.63166046142578], "page": 31, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Use case 5: AI-powered video analytics on an infant's motions for health prediction", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.60496520996094, 687.0557861328125, 542.2593994140625, 721.791015625], "page": 32, "span": [0, 83], "__ref_s3_data": null}]}, {"text": "Each year, approximately 5 million newborns worldwide are suffering from a neuro-developmental disorder. Due to the lack of early diagnoses and intervention, many infants are disabled and abandoned, especially in countries with limited numbers of pediatricians with extensive experience in neuro-developmental disorders. This situation is a conundrum that plagues many families around the world.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.98846435546875, 612.279052734375, 546.6989135742188, 670.7722778320312], "page": 32, "span": [0, 395], "__ref_s3_data": null}]}, {"text": "Infant motion analysis plays critical importance to understanding and comprehending healthy childhood development. In infants, monitoring their poses provides information about their health that can lead to a better prediction of early developmental risk assessment and diagnosis.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0279998779297, 553.5992431640625, 547.3121948242188, 600.06591796875], "page": 32, "span": [0, 280], "__ref_s3_data": null}]}, {"text": "Adults use different techniques and methods to express their feelings (like sick, happy, stressed, or hungry), but this case is usually different for infants who cannot express their feelings. Based on the baby movements, AI can predict their expression or health.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.17251586914062, 508.10137939453125, 533.3443603515625, 542.4315185546875], "page": 32, "span": [0, 264], "__ref_s3_data": null}]}, {"text": "In this use case, we examine how AI-powered video analytics can assist new parents and hospitals by addressing pose-based real-time body movements of the infants (such as arching back, head banging, kicking legs, rubbing eyes, stretching, and sucking fingers). During the initial months of a baby's life, spontaneous movements might indicate later developmental disorders, such as cerebral palsy, Rett syndrome, and autism spectrum disorders.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.91824340820312, 426.2819519042969, 535.1302490234375, 496.59674072265625], "page": 32, "span": [0, 442], "__ref_s3_data": null}]}, {"text": "Industry challenges", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.6669921875, 393.6462707519531, 186.71859741210938, 406.78857421875], "page": 32, "span": [0, 19], "__ref_s3_data": null}]}, {"text": "There are video surveillance systems that are installed for monitoring an infant's movement in many hospitals or homes so that any problem can be witnessed and potentially even stopped before they take place. These systems require much manual work to monitor the real-stream videos and intervene when a problem is detected.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.72630310058594, 334.298828125, 547.2576293945312, 380.78509521484375], "page": 32, "span": [0, 323], "__ref_s3_data": null}]}, {"text": "There is a certain amount of trust that you must place on the person who monitors a surveillance system to ensure that the job is being done effectively and efficiently, and that the surveillance system is being vigilantly watched. Because of the dependency on these manual efforts, you need something \"smart\" that monitors constantly the surveillance system and detect problems effectively.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.09779357910156, 264.0821228027344, 547.2775268554688, 322.5864562988281], "page": 32, "span": [0, 391], "__ref_s3_data": null}]}, {"text": "AI is shaping the controls of surveillance that can map and track occurrences with self-learning abilities, AI can improve on human operations and analyze video footage in real time to alert the hospitals or parents if any anomalies are identified.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.74172973632812, 217.7925262451172, 547.2385864257812, 252.07472229003906], "page": 32, "span": [0, 248], "__ref_s3_data": null}]}, {"text": "Video processing a stream of data from surveillance systems and then performing advance analytics and detecting anomalies quickly is a significant challenge in the industry.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9100341796875, 183.46926879882812, 541.7665405273438, 206.01150512695312], "page": 32, "span": [0, 173], "__ref_s3_data": null}]}, {"text": "Infant motion analytics in real time", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.55262756347656, 150.96932983398438, 278.4443054199219, 164.3548583984375], "page": 32, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "AI is the current \"market trend evolution\" in video analytics and advancing the decision-making capabilities of the human mind. DL-based computer vision AI techniques are being widely adopted by various industries to solve real-time problems. These techniques improve the detection and prediction accuracy without increasing the hardware cost exponentially. For users, AI greatly reduces the workload of the monitoring staff and provides benefits by detecting unusual incidents and solving many video forensic problems.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.83998107910156, 67.68016052246094, 547.2257080078125, 137.958740234375], "page": 32, "span": [0, 519], "__ref_s3_data": null}]}, {"text": "30", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.2401351928711, 27.93828010559082, 78.4020004272461, 37.58808135986328], "page": 32, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.673728942871094, 267.07440185546875, 37.15589141845703], "page": 32, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "S", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.79975891113281, 614.259521484375, 71.44307708740234, 623.4725341796875], "page": 33, "span": [0, 1], "__ref_s3_data": null}]}, {"text": "Figure 24 Architecture for AI-powered video analytics", "type": "caption", "name": "Caption", "font": null, "prov": [{"bbox": [64.60729217529297, 368.9632873535156, 281.4980773925781, 378.5414733886719], "page": 33, "span": [0, 53], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/26"}, {"text": "Live camera feeds or recorded videos of an infant's movement are the inputs for a pose detection model. This video streaming data was stored in IBM Cloudfi Object Storage for image processing. Video data must be transformed into frames so that the infant's body poses can be detected. These post-estimation components of the pipeline predict the location of all 17-person key points with 3 degrees of freedom each (x, y location and visibility) plus two virtual alignment key points. This approach also embraces a compute-intensive heat map prediction of infant body posture.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.84861755371094, 274.2659912109375, 547.311279296875, 356.6263732910156], "page": 33, "span": [0, 575], "__ref_s3_data": null}]}, {"text": "When changes in body posture or movement happen, analytics can be performed, and a threshold can be set for the angle of the body and posture movements. An analysis can be performed on movement that is based on that threshold to help to predict an infant's health index in the output video stream by leveraging the IBM z16 on-chip AI acceleration, which provides an execution speed in real time on an edge device, which cannot be achieved by other means.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89413452148438, 192.76055908203125, 539.9171752929688, 262.6158447265625], "page": 33, "span": [0, 454], "__ref_s3_data": null}]}, {"text": "We can leverage the following AI technology stack for this use case:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.61978149414062, 170.2261962890625, 437.95953369140625, 180.65838623046875], "page": 33, "span": [0, 68], "__ref_s3_data": null}]}, {"text": "GLYPH Convolutional neural network: Build an artificial neural network model on video streaming and images.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.57650756835938, 141.35748291015625, 546.6869506835938, 163.9613494873047], "page": 33, "span": [0, 117], "__ref_s3_data": null}]}, {"text": "GLYPH TensorFlow: A DL back-end framework that is based on TensorFlow.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.63107299804688, 124.72181701660156, 455.69329833984375, 134.6374969482422], "page": 33, "span": [0, 80], "__ref_s3_data": null}]}, {"text": "GLYPH Mediapipe: A library that helps with video streaming processing and prediction of human pose estimation.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.57284545898438, 95.74219512939453, 543.4529418945312, 117.91780090332031], "page": 33, "span": [0, 120], "__ref_s3_data": null}]}, {"text": "GLYPH OpenCV: A real-time computer vision library that helps perform image processing.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.501220703125, 78.27716064453125, 516.3308715820312, 88.64454650878906], "page": 33, "span": [0, 96], "__ref_s3_data": null}]}, {"text": "31", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5914916992188, 27.93828010559082, 547.2591552734375, 37.50794219970703], "page": 33, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "CP4D was used to build and deploy the AI-powered video analytics on infant's motion for health prediction use case on IBM Z. IBM Z with AI accelerator enables faster inference for detecting face and body movements and performing angle analytics in real time.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.94410705566406, 686.981201171875, 540.1576538085938, 721.435791015625], "page": 33, "span": [0, 258], "__ref_s3_data": null}]}, {"text": "Figure 24 shows an architectural diagram about how to design and develop an AI model for real-time body pose detection on IBM Z. A deep convolutional neural network architecture was trained on the task of infant pose estimation on the custom data set by leveraging IBM Cloud Pak for Data.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0972137451172, 629.25927734375, 542.9444580078125, 675.5055541992188], "page": 33, "span": [0, 288], "__ref_s3_data": null}]}, {"text": "WML was used for deployment of the pose detection model and generated notifications to users with web and mobile applications, and it integrates with Fitbit for push notifications so that hospitals and parents can take preventive actions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.89791870117188, 687.1724243164062, 542.3601684570312, 721.3072509765625], "page": 34, "span": [0, 238], "__ref_s3_data": null}]}, {"text": "Additional resources", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.27169799804688, 644.0363159179688, 223.8605499267578, 659.7733154296875], "page": 34, "span": [0, 20], "__ref_s3_data": null}]}, {"text": "GLYPH The Cloud Pak for Data 4.5 on IBM Z Overview Demo video provides an overview of some of the more important features of CP4D on IBM Z.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.46092224121094, 605.2584838867188, 547.2325439453125, 627.453857421875], "page": 34, "span": [0, 149], "__ref_s3_data": null}]}, {"text": "GLYPH IBM Cloud Pak for Data Tutorials.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.48532104492188, 588.2786865234375, 300.0693054199219, 598.7608642578125], "page": 34, "span": [0, 49], "__ref_s3_data": null}]}, {"text": "GLYPH Here are some additional use cases that use the data science frameworks that are available as part of CP4D on IBM Z and IBM LinuxONE:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.57339477539062, 559.299072265625, 518.558837890625, 581.29833984375], "page": 34, "span": [0, 149], "__ref_s3_data": null}]}, {"text": "-Payment Card Fraud Detection by using TensorFlow on CP4D on IBM Z and IBM LinuxONE is a payment card fraud detection use case.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.18695068359375, 529.6796875, 527.840576171875, 552.08740234375], "page": 34, "span": [0, 127], "__ref_s3_data": null}]}, {"text": "-Fashion-MNIST clothing classification with PyTorch on Cloud Pak for Data on IBM Z and IBM LinuxONE is a Fashion-MNIST clothing classification use case.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.0952606201172, 501.1863708496094, 539.9617919921875, 523.5965576171875], "page": 34, "span": [0, 152], "__ref_s3_data": null}]}, {"text": "-Payment Card Fraud Prevention by using Snap ML on IBM Cloud Pak for Data on Red Hat OpenShift on a virtual machine on IBM Z and IBM LinuxONE, which leverage the z16 integrated AI accelerator describes a use case that uses Snap Machine Learning in Cloud Pak for Data on IBM Z and IBM LinuxONE. It is a Snap ML use case.", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [151.3111572265625, 448.2712097167969, 547.2676391601562, 494.79150390625], "page": 34, "span": [0, 319], "__ref_s3_data": null}]}, {"text": "A companion video can be found at Credit Card Fraud Detection by using Snap ML on IBM Cloud Pak for Data on IBM Z and IBM LinuxONE.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [164.91055297851562, 419.2614440917969, 547.1928100585938, 441.53619384765625], "page": 34, "span": [0, 131], "__ref_s3_data": null}]}, {"text": "Summary", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.38446807861328, 375.9455871582031, 137.7028350830078, 391.6693420410156], "page": 34, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "This IBM Redbooksfi publication presented an overview of how IBM Cloud Pak for Data on IBM Z can modernize your data infrastructure; develop and deploy ML and AI models; and instantiate highly efficient analytics deployment on IBM LinuxONE. This publication demonstrated these tasks by guiding the reader through five common use cases where CP4D on IBM Z and IBM LinuxONE uses the different features that are supported on the platform, and showing how the associated features can help an enterprise to build AI and ML models with core transactional data, which results in a highly efficient analytics deployment that minimizes latency, cost inefficiencies, and potential security exposures that are connected with data transportation.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.69834899902344, 253.239990234375, 547.1648559570312, 359.5583801269531], "page": 34, "span": [0, 734], "__ref_s3_data": null}]}, {"text": "Authors", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [63.984222412109375, 220.6373748779297, 114.55732727050781, 233.51852416992188], "page": 34, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "32", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.27400970458984, 27.93828010559082, 78.4020004272461, 37.550392150878906], "page": 34, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.730152130126953, 267.07440185546875, 37.189125061035156], "page": 34, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "This publication was produced by a team of specialists from around the world working with the IBM Redbooks team:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.93601989746094, 185.25857543945312, 538.573486328125, 207.30662536621094], "page": 34, "span": [0, 112], "__ref_s3_data": null}]}, {"text": "Jasmeet Bhatia is an AI on IBM Z Product Manager who supports CP4D on IBM Z. She has 2.5 years of combined experience as a data scientist and a product manager. Jasmeet lives in San Francisco, California and holds a Bachelor of Arts degree in Data Science. She is working on her Master of Science degree in Data Science. Her area of expertise includes AI, data science, and product management.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.83291625976562, 115.17806243896484, 547.2435913085938, 173.28330993652344], "page": 34, "span": [0, 393], "__ref_s3_data": null}]}, {"text": "Ravi Gummadi is a Technical Leader for CP4D on Linux on IBM Z and IBM LinuxONE in India. He has 18+ years of experience in the design and development of enterprise software for various platforms, including IBM Z and IBM LinuxONE. He holds a master's degree in computer science and engineering from the Indian Institute of Technology Madras (IIT Madras). His areas of expertise include compilers, virtualization, big data analytics, containers, data, and AI, with a special focus on open-source ecosystems.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.073974609375, 651.14697265625, 546.0402221679688, 721.3735961914062], "page": 35, "span": [0, 505], "__ref_s3_data": null}]}, {"text": "Chandra Shekhar Reddy Potula is a Lead AI on zSystems team Architect for Linux on IBM Z and LinuxONE in India. He has 18+ years of experience in the design and development of enterprise software and firmware for various platforms, including IBM Z and LinuxONE. He holds a degree in computer science of engineering from Jawaharlal Nehru Technological University (JNTU). His areas of expertise include networking, virtualization, containers, data, and AI, with a special focus on open-source ecosystems.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.9738311767578, 568.9080200195312, 546.8887329101562, 639.3916015625], "page": 35, "span": [0, 501], "__ref_s3_data": null}]}, {"text": "Srirama Sharma is a Lead Technical Architect for IBM Cloud Pak, IBM Instanafi, IBM Turbonomicfi, and Red Hat Advanced Cluster Management for Kubernetes (RHACM) on IBM Z and LinuxONE. He has 18+ years of experience in UNIX and Linux application and device driver development. He designs ISV solutions on IBM Systems and IBM Blockchainfi. He also works on cloud-native adoption of enterprise solutions on IBM Z and LinuxONE. Srirama holds a Bachelor of Engineering degree in computer science from Visvesvaraya Technological University (VTU). He lives in Bangalore, Karnataka. His areas of expertise include UNIX and Linux systems programming, virtualization, performance benchmarking of Financial Services Sector (FSS) industry solutions, open-source ecosystems, server infrastructure, and cloud-native adoption and modernization.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.81300354003906, 439.12628173828125, 547.256103515625, 557.2632446289062], "page": 35, "span": [0, 828], "__ref_s3_data": null}]}, {"text": "Thanks to the following people for their contributions to this project:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.1691436767578, 417.03826904296875, 432.8396301269531, 427.28643798828125], "page": 35, "span": [0, 71], "__ref_s3_data": null}]}, {"text": "Lydia Parziale, Project Manager IBM Redbooks, Poughkeepsie Center", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.17153930664062, 382.91351318359375, 314.4146423339844, 405.3429870605469], "page": 35, "span": [0, 65], "__ref_s3_data": null}]}, {"text": "Shin Kelly Yang, AI on IBM Z Product Management IBM US", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.13253784179688, 349.3034973144531, 364.24346923828125, 371.6414489746094], "page": 35, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Tom Ramey, Anna Shugol, Andrew Sica, Jonathan Sloan, Elpida Tzortzatos, Meeta Vouk, IBM", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.0904998779297, 315.28411865234375, 537.7623291015625, 337.33740234375], "page": 35, "span": [0, 87], "__ref_s3_data": null}]}, {"text": "Now you can become a published author, too!", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 282.2770690917969, 349.12164306640625, 295.41973876953125], "page": 35, "span": [0, 43], "__ref_s3_data": null}]}, {"text": "Here's an opportunity to spotlight your skills, grow your career, and become a published author-all at the same time! Join an IBM Redbooks residency project and help write a book in your area of expertise, while honing your experience using leading-edge technologies. Your efforts will help to increase product acceptance and customer satisfaction, as you expand your network of technical contacts and relationships. Residencies run from two to six weeks in length, and you can participate either in person or as a remote resident working from your home base.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [135.90232849121094, 187.2394561767578, 547.3480224609375, 269.3630065917969], "page": 35, "span": [0, 559], "__ref_s3_data": null}]}, {"text": "Find out more about the residency program, browse the residency index, and apply online at:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.51300048828125, 164.8619842529297, 547.3431396484375, 175.372802734375], "page": 35, "span": [0, 91], "__ref_s3_data": null}]}, {"text": "ibm.com /redbooks/residencies.html", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [136.80099487304688, 148.4791259765625, 301.6788024902344, 157.64808654785156], "page": 35, "span": [0, 34], "__ref_s3_data": null}]}, {"text": "33", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5746459960938, 27.93828010559082, 547.2591552734375, 37.47697448730469], "page": 35, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Stay connected to IBM Redbooks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.46549987792969, 708.3641967773438, 270.4855651855469, 721.534423828125], "page": 36, "span": [0, 30], "__ref_s3_data": null}]}, {"text": "GLYPH Find us on LinkedIn:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.5939178466797, 685.2982788085938, 241.2664794921875, 695.3732299804688], "page": 36, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "http://www.linkedin.com/groups?home=&gid=2130806", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [150.66708374023438, 667.9544677734375, 391.0767822265625, 677.9544677734375], "page": 36, "span": [0, 48], "__ref_s3_data": null}]}, {"text": "GLYPH Explore new Redbooks publications, residencies, and workshops with the IBM Redbooks weekly newsletter:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.4261932373047, 639.2014770507812, 546.8383178710938, 661.0797729492188], "page": 36, "span": [0, 118], "__ref_s3_data": null}]}, {"text": "https://www.redbooks.ibm.com/Redbooks.nsf/subscribe?OpenForm", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.72427368164062, 622.588134765625, 451.4742736816406, 632.6220703125], "page": 36, "span": [0, 60], "__ref_s3_data": null}]}, {"text": "GLYPH Stay current on recent Redbooks publications with RSS Feeds:", "type": "paragraph", "name": "List-item", "font": null, "prov": [{"bbox": [135.66847229003906, 605.2367553710938, 430.1478271484375, 615.342041015625], "page": 36, "span": [0, 76], "__ref_s3_data": null}]}, {"text": "http://www.redbooks.ibm.com/rss.html", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [150.44317626953125, 588.5687866210938, 331.0777282714844, 598.3783569335938], "page": 36, "span": [0, 36], "__ref_s3_data": null}]}, {"text": "34", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.26653289794922, 27.93828010559082, 78.4020004272461, 37.59064865112305], "page": 36, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 27.662551879882812, 267.07440185546875, 37.169410705566406], "page": 36, "span": [0, 38], "__ref_s3_data": null}]}, {"text": "Notices", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.80000305175781, 695.9519653320312, 151.5048065185547, 718.752197265625], "page": 37, "span": [0, 7], "__ref_s3_data": null}]}, {"text": "This information was developed for products and services offered in the US. This material might be available from IBM in other languages. However, you may be required to own a copy of the product or product version in that language in order to access it.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.00486755371094, 629.8663330078125, 547.2454833984375, 659.926025390625], "page": 37, "span": [0, 254], "__ref_s3_data": null}]}, {"text": "IBM may not offer the products, services, or features discussed in this document in other countries. Consult your local IBM representative for information on the products and services currently available in your area. Any reference to an IBM product, program, or service is not intended to state or imply that only that IBM product, program, or service may be used. Any functionally equivalent product, program, or service that does not infringe any IBM intellectual property right may be used instead. However, it is the user's responsibility to evaluate and verify the operation of any non-IBM product, program, or service.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.13760375976562, 559.7279663085938, 547.1796875, 619.8883056640625], "page": 37, "span": [0, 625], "__ref_s3_data": null}]}, {"text": "IBM may have patents or pending patent applications covering subject matter described in this document. The furnishing of this document does not grant you any license to these patents. You can send license inquiries, in writing, to:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.0386962890625, 519.6210327148438, 547.3560180664062, 550.1978149414062], "page": 37, "span": [0, 232], "__ref_s3_data": null}]}, {"text": "IBM Director of Licensing, IBM Corporation, North Castle Drive, MD-NC119, Armonk, NY 10504-1785, US", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.49063873291016, 509.9056701660156, 535.3104248046875, 520.2186889648438], "page": 37, "span": [0, 99], "__ref_s3_data": null}]}, {"text": "INTERNATIONAL BUSINESS MACHINES CORPORATION PROVIDES THIS PUBLICATION \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Some jurisdictions do not allow disclaimer of express or implied warranties in certain transactions, therefore, this statement may not apply to you.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.12965393066406, 449.7742004394531, 545.7673950195312, 500.42437744140625], "page": 37, "span": [0, 411], "__ref_s3_data": null}]}, {"text": "This information could include technical inaccuracies or typographical errors. Changes are periodically made to the information herein; these changes will be incorporated in new editions of the publication. IBM may make improvements and/or changes in the product(s) and/or the program(s) described in this publication at any time without notice.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.0031509399414, 400.1820983886719, 547.2484130859375, 439.9192810058594], "page": 37, "span": [0, 345], "__ref_s3_data": null}]}, {"text": "Any references in this information to non-IBM websites are provided for convenience only and do not in any manner serve as an endorsement of those websites. The materials at those websites are not part of the materials for this IBM product and use of those websites is at your own risk.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.96153259277344, 359.86328125, 541.5413208007812, 389.9772033691406], "page": 37, "span": [0, 286], "__ref_s3_data": null}]}, {"text": "IBM may use or distribute any of the information you provide in any way it believes appropriate without incurring any obligation to you.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.42230224609375, 329.4605407714844, 519.2667846679688, 350.0104675292969], "page": 37, "span": [0, 136], "__ref_s3_data": null}]}, {"text": "The performance data and client examples cited are presented for illustrative purposes only. Actual performance results may vary depending on specific configurations and operating conditions.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.80852127075195, 299.43133544921875, 505.2710266113281, 319.78839111328125], "page": 37, "span": [0, 191], "__ref_s3_data": null}]}, {"text": "Information concerning non-IBM products was obtained from the suppliers of those products, their published announcements or other publicly available sources. IBM has not tested those products and cannot confirm the accuracy of performance, compatibility or any other claims related to non-IBM products. Questions on the capabilities of non-IBM products should be addressed to the suppliers of those products.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.22393798828125, 249.75856018066406, 547.2424926757812, 289.9355163574219], "page": 37, "span": [0, 408], "__ref_s3_data": null}]}, {"text": "Statements regarding IBM's future direction or intent are subject to change or withdrawal without notice, and represent goals and objectives only.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.30084991455078, 219.74908447265625, 544.685791015625, 239.94190979003906], "page": 37, "span": [0, 146], "__ref_s3_data": null}]}, {"text": "This information contains examples of data and reports used in daily business operations. To illustrate them as completely as possible, the examples include the names of individuals, companies, brands, and products. All of these names are fictitious and any similarity to actual people or business enterprises is entirely coincidental.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.958980560302734, 170.14593505859375, 547.016357421875, 209.852294921875], "page": 37, "span": [0, 335], "__ref_s3_data": null}]}, {"text": "COPYRIGHT LICENSE:", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.37278747558594, 150.16616821289062, 172.50048828125, 160.1703643798828], "page": 37, "span": [0, 18], "__ref_s3_data": null}]}, {"text": "This information contains sample application programs in source language, which illustrate programming techniques on various operating platforms. You may copy, modify, and distribute these sample programs in any form without payment to IBM, for the purposes of developing, using, marketing or distributing application programs conforming to the application programming interface for the operating platform for which the sample programs are written. These examples have not been thoroughly tested under all conditions. IBM, therefore, cannot guarantee or imply reliability, serviceability, or function of these programs. The sample programs are provided \"AS IS\", without warranty of any kind. IBM shall not be liable for any damages arising out of your use of the sample programs.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.9893798828125, 59.65068817138672, 547.3580932617188, 140.0205841064453], "page": 37, "span": [0, 779], "__ref_s3_data": null}]}, {"text": "' Copyright IBM Corp. 2023.", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [63.854331970214844, 27.89427947998047, 180.32760620117188, 37.42281723022461], "page": 37, "span": [0, 27], "__ref_s3_data": null}]}, {"text": "35", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [535.5529174804688, 27.93828010559082, 547.2591552734375, 37.60062026977539], "page": 37, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "Trademarks", "type": "subtitle-level-1", "name": "Section-header", "font": null, "prov": [{"bbox": [64.16413116455078, 706.0162963867188, 155.489501953125, 721.5309448242188], "page": 38, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "IBM, the IBM logo, and ibm.com are trademarks or registered trademarks of International Business Machines Corporation, registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the web at \"Copyright and trademark information\" at http://www.ibm.com/legal/copytrade.shtml", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.14505004882812, 649.2987060546875, 547.2343139648438, 689.3553466796875], "page": 38, "span": [0, 383], "__ref_s3_data": null}]}, {"text": "The following terms are trademarks or registered trademarks of International Business Machines Corporation, and might also be trademarks or registered trademarks in other countries.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.75762176513672, 619.232177734375, 547.241455078125, 639.3633422851562], "page": 38, "span": [0, 181], "__ref_s3_data": null}]}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"text": "The following terms are trademarks of other companies:", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.11603546142578, 523.834716796875, 312.0052490234375, 533.9044189453125], "page": 38, "span": [0, 54], "__ref_s3_data": null}]}, {"text": "Intel, Intel logo, Intel Inside logo, and Intel Centrino logo are trademarks or registered trademarks of Intel Corporation or its subsidiaries in the United States and other countries.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.29424285888672, 493.69439697265625, 528.6849365234375, 514.6287841796875], "page": 38, "span": [0, 184], "__ref_s3_data": null}]}, {"text": "The registered trademark Linuxfi is used pursuant to a sublicense from the Linux Foundation, the exclusive licensee of Linus Torvalds, owner of the mark on a worldwide basis.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [63.84184265136719, 464.0928955078125, 541.6887817382812, 484.438720703125], "page": 38, "span": [0, 174], "__ref_s3_data": null}]}, {"text": "Red Hat and OpenShift are trademarks or registered trademarks of Red Hat, Inc. or its subsidiaries in the United States and other countries.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.25360870361328, 434.2598876953125, 531.9806518554688, 454.5678405761719], "page": 38, "span": [0, 140], "__ref_s3_data": null}]}, {"text": "UNIX is a registered trademark of The Open Group in the United States and other countries.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.4344482421875, 414.2238464355469, 472.0943908691406, 424.56378173828125], "page": 38, "span": [0, 90], "__ref_s3_data": null}]}, {"text": "Other company, product, or service names may be trademarks or service marks of others.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [64.17461395263672, 394.30035400390625, 465.3721618652344, 404.4769287109375], "page": 38, "span": [0, 86], "__ref_s3_data": null}]}, {"text": "36", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [64.34241485595703, 26.91827964782715, 78.4020004272461, 36.332603454589844], "page": 38, "span": [0, 2], "__ref_s3_data": null}]}, {"text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null, "prov": [{"bbox": [93.42030334472656, 26.466215133666992, 267.07440185546875, 36.090301513671875], "page": 38, "span": [0, 38], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/27"}, {"text": "Back cover", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [287.2200012207031, 741.251953125, 415.20721435546875, 763.4519653320312], "page": 40, "span": [0, 10], "__ref_s3_data": null}]}, {"text": "REDP-5695-00", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [496.1397399902344, 670.4779663085938, 564.1908569335938, 680.523193359375], "page": 40, "span": [0, 12], "__ref_s3_data": null}]}, {"text": "ISBN 0738461067", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [482.7099609375, 649.478271484375, 564.5999145507812, 659.0663452148438], "page": 40, "span": [0, 15], "__ref_s3_data": null}]}, {"text": "Printed in U.S.A.", "type": "paragraph", "name": "Text", "font": null, "prov": [{"bbox": [497.399169921875, 89.81710052490234, 564.1929321289062, 99.43573760986328], "page": 40, "span": [0, 17], "__ref_s3_data": null}]}, {"name": "Picture", "type": "figure", "$ref": "#/figures/28"}], "figures": [{"bounding-box": null, "prov": [{"bbox": [409.23211669921875, 713.666259765625, 568.517822265625, 757.0501708984375], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [0.12108886241912842, 90.19225311279297, 610.4767456054688, 504.2667541503906], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [33.33281326293945, 552.1343383789062, 238.89004516601562, 721.9103393554688], "page": 3, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [135.98939514160156, 345.3135986328125, 436.0746154785156, 714.1400756835938], "page": 5, "span": [0, 16], "__ref_s3_data": null}], "text": "Figure 1 IBM z16", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.93318176269531, 415.7976989746094, 547.8917236328125, 685.1072998046875], "page": 6, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 2 IBM Z: Processor roadmap", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [434.496826171875, 442.9707946777344, 543.5748901367188, 660.1309204101562], "page": 7, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 3 System design of IBM z16 LinuxONE Emperor 4", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [65.18315887451172, 430.1353454589844, 547.7926635742188, 684.5646362304688], "page": 8, "span": [0, 78], "__ref_s3_data": null}], "text": "Figure 4 IBM z16 on-chip AI Accelerator integration with IBM Z processor cores", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.25979614257812, 492.203369140625, 548.035888671875, 714.2471923828125], "page": 9, "span": [0, 29], "__ref_s3_data": null}], "text": "Figure 5 Seamless integration", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.54902648925781, 310.984375, 547.7777709960938, 612.0947265625], "page": 10, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6 Solution overview of Cloud Pak for Data", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.61245346069336, 441.5110168457031, 547.817138671875, 714.1513061523438], "page": 13, "span": [0, 104], "__ref_s3_data": null}], "text": "Figure 7 Developing, training, and deploying an AI model on Cloud Pak for Data on IBM Z and IBM LinuxONE", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.72257995605469, 462.15850830078125, 547.6842041015625, 660.9888916015625], "page": 16, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 8 Typical AI model lifecycle", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.82352828979492, 501.3348693847656, 548.0906982421875, 714.08984375], "page": 18, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 9 Remote AI governance solution end-to-end flow", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.964263916015625, 125.87242126464844, 547.62890625, 423.3223571777344], "page": 18, "span": [0, 49], "__ref_s3_data": null}], "text": "Figure 10 Creating a model entry in IBM OpenPages", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.76428985595703, 378.5587158203125, 547.38916015625, 672.571533203125], "page": 19, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 11 Training an AI model by using Watson Studio", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.11590576171875, 67.91944122314453, 547.7499389648438, 318.0794982910156], "page": 19, "span": [0, 66], "__ref_s3_data": null}], "text": "Figure 12 Deploying an AI model by using WML on Cloud Pak for Data", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [136.353515625, 406.53900146484375, 533.409912109375, 661.0449829101562], "page": 20, "span": [0, 24], "__ref_s3_data": null}], "text": "Figure 13 External model", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.07536315917969, 83.89727020263672, 547.8724975585938, 346.0033874511719], "page": 20, "span": [0, 28], "__ref_s3_data": null}], "text": "Figure 14 Tracking the model", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.772430419921875, 380.2639465332031, 547.672119140625, 684.7722778320312], "page": 21, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 15 Model facts that are tracked and synchronized to IBM OpenPages on an x86 platform", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.67052459716797, 410.38824462890625, 547.7557983398438, 684.6401977539062], "page": 22, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 16 Creating an external model on an x86 platform", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.84904861450195, 440.8315734863281, 547.9327392578125, 713.8433837890625], "page": 23, "span": [0, 76], "__ref_s3_data": null}], "text": "Figure 17 IBM OpenScale dashboard that is used to monitor the external model", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.19013214111328, 69.50128936767578, 547.7117919921875, 344.5738220214844], "page": 23, "span": [0, 103], "__ref_s3_data": null}], "text": "Figure 18 Final result: End-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.70671081542969, 451.1697692871094, 545.1655883789062, 713.8380737304688], "page": 25, "span": [0, 82], "__ref_s3_data": null}], "text": "Figure 19 Architecture for credit risk prediction by using an ML AI model on IBM Z", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.45086669921875, 433.2137756347656, 547.7979736328125, 696.4598388671875], "page": 26, "span": [0, 70], "__ref_s3_data": null}], "text": "Figure 20 Architecture for credit risk prediction by using DL on IBM Z", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.91328811645508, 479.8487548828125, 542.5390625, 684.5452270507812], "page": 28, "span": [0, 97], "__ref_s3_data": null}], "text": "Figure 21 Clearing and settlement use case for financial transactions by using Cloud Pak for Data", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.87143325805664, 173.84889221191406, 539.6433715820312, 410.2047119140625], "page": 29, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 22 Inferencing architecture on IBM Z", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [63.89833068847656, 352.471923828125, 547.9695434570312, 714.0540771484375], "page": 31, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 23 In-depth architectural view", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [64.11891174316406, 381.7127380371094, 541.8325805664062, 614.5540161132812], "page": 33, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 24 Architecture for AI-powered video analytics", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [42.87873458862305, 15.771553993225098, 69.14901733398438, 43.16151428222656], "page": 40, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}, {"bounding-box": null, "prov": [{"bbox": [433.9858703613281, 19.161434173583984, 572.2542114257812, 54.680973052978516], "page": 40, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure"}], "tables": [{"bounding-box": null, "prov": [{"bbox": [75.00436401367188, 546.11572265625, 487.3641357421875, 611.8494262695312], "page": 38, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 3, "#-rows": 6, "data": [[{"bbox": [75.5999984741211, 601.1370239257812, 98.19540405273438, 609.4619750976562], "spans": [[0, 0]], "text": "Db2fi IBMfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [236.40029907226562, 601.13623046875, 292.0049743652344, 609.461181640625], "spans": [[0, 1]], "text": "IBM Watsonfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [397.2005920410156, 601.1354370117188, 455.17950439453125, 609.4603881835938], "spans": [[0, 2]], "text": "Redbooks (log o) fi Turbon", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [236.40029907226562, 590.15625, 278.3294982910156, 598.481201171875], "spans": [[1, 1]], "text": "IBM z16\u2122", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [425.6900939941406, 590.1554565429688, 451.2635803222656, 598.4804077148438], "spans": [[1, 2]], "text": "omicfi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [75.5999984741211, 579.1167602539062, 144.1304931640625, 587.4417114257812], "spans": [[2, 0]], "text": "IBM Blockchainfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [236.40029907226562, 579.115966796875, 272.4696044921875, 587.44091796875], "spans": [[2, 1]], "text": "Instanafi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [397.2005920410156, 579.1151733398438, 451.2527770996094, 587.4401245117188], "spans": [[2, 2]], "text": "WebSpherefi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [75.5999984741211, 568.1367797851562, 112.57559204101562, 576.4617309570312], "spans": [[3, 0]], "text": "IBM Cloudfi IBM Clou", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [236.40029907226562, 568.135986328125, 294.3809509277344, 576.4609375], "spans": [[3, 1]], "text": "Open Libertyfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [397.2005920410156, 568.1351928710938, 423.8045959472656, 576.4601440429688], "spans": [[3, 2]], "text": "z/OSfi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [112.60798645019531, 557.1567993164062, 142.21170043945312, 565.4817504882812], "spans": [[4, 0]], "text": "d Pakfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [236.40029907226562, 557.156005859375, 290.44708251953125, 565.48095703125], "spans": [[4, 1]], "text": "OpenPagesfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [397.2005920410156, 557.1552124023438, 420.6365966796875, 565.4801635742188], "spans": [[4, 2]], "text": "z16\u2122", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [75.5999984741211, 546.1165161132812, 128.0511016845703, 554.4414672851562], "spans": [[5, 0]], "text": "IBM Telum\u2122", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [236.40029907226562, 546.11572265625, 283.47210693359375, 554.440673828125], "spans": [[5, 1]], "text": "Redbooksfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}]], "model": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}, {"height": 792.0, "page": 15, "width": 612.0}, {"height": 792.0, "page": 16, "width": 612.0}, {"height": 792.0, "page": 17, "width": 612.0}, {"height": 792.0, "page": 18, "width": 612.0}, {"height": 792.0, "page": 19, "width": 612.0}, {"height": 792.0, "page": 20, "width": 612.0}, {"height": 792.0, "page": 21, "width": 612.0}, {"height": 792.0, "page": 22, "width": 612.0}, {"height": 792.0, "page": 23, "width": 612.0}, {"height": 792.0, "page": 24, "width": 612.0}, {"height": 792.0, "page": 25, "width": 612.0}, {"height": 792.0, "page": 26, "width": 612.0}, {"height": 792.0, "page": 27, "width": 612.0}, {"height": 792.0, "page": 28, "width": 612.0}, {"height": 792.0, "page": 29, "width": 612.0}, {"height": 792.0, "page": 30, "width": 612.0}, {"height": 792.0, "page": 31, "width": 612.0}, {"height": 792.0, "page": 32, "width": 612.0}, {"height": 792.0, "page": 33, "width": 612.0}, {"height": 792.0, "page": 34, "width": 612.0}, {"height": 792.0, "page": 35, "width": 612.0}, {"height": 792.0, "page": 36, "width": 612.0}, {"height": 792.0, "page": 37, "width": 612.0}, {"height": 792.0, "page": 38, "width": 612.0}, {"height": 792.0, "page": 39, "width": 612.0}, {"height": 792.0, "page": 40, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "redp5695.pdf", "filename-prov": null, "document-hash": "a03aa4721c6532a8bab8a84cd8fdf579b5d91b92e5e5bbf63552b451c1b1ad7e", "#-pages": 40, "collection-name": null, "description": null, "page-hashes": [{"hash": "2c6aa6caf31aededa105d495c308dfbbb82f36e74a5c918aef77cb55e270512c", "model": "default", "page": 1}, {"hash": "4c44677e63816427d586e3351fca1f60791742ff03d4e4fbd9e20049f0df1e7c", "model": "default", "page": 2}, {"hash": "9b14b4b39f406833be1be7b8a4267fbc0c9e3fb38d9c0d6be9233aaa7b34a290", "model": "default", "page": 3}, {"hash": "b393eac931d491dfb5754007d81c5eb852837b767e6bbf97532fdd9797089c84", "model": "default", "page": 4}, {"hash": "069d8724992bd8bcf023444a735733831db317258922be9bf9e68b6cb71592b3", "model": "default", "page": 5}, {"hash": "50142dc112c6479b6aa9b337444bf76fab1788845c852372d680663fac7bb708", "model": "default", "page": 6}, {"hash": "dd036a3882859d0dca411354baf6f8d89fecaa39938e0336366cdb0e70aea878", "model": "default", "page": 7}, {"hash": "b6ff3d96c10d8bb147a1678f23b1b3dc0a57314ac67de6dc51c7fed42235b8fb", "model": "default", "page": 8}, {"hash": "a9f5f13f5b38d6e5f144870e072b9b5dcda3e9efb2612d12f2f78844a2af5faf", "model": "default", "page": 9}, {"hash": "2ee65c9e3ecaf2ee28d154184562131c2c0d46fde9536daf2bf63f27cd1593fb", "model": "default", "page": 10}, {"hash": "7c3211d92edf78fb8fc2a25419e905c54df6cf08e46a6ba3af97c6ecb6f42976", "model": "default", "page": 11}, {"hash": "e9d2d2c16961c78c4252759f518e49d67e42323770ea954f0fdb5845060255b4", "model": "default", "page": 12}, {"hash": "96ec46a4b8a06f1e4a0b8ab372f0fa2eb9fa3651d598777108e2c8d841504c01", "model": "default", "page": 13}, {"hash": "85666d6d7492d1aa5e2d680aad82b243f74ac9a88d12a822f1e23a4015653280", "model": "default", "page": 14}, {"hash": "996b6243f71e4b579c257f5a8c13120d756c35db892bb3978a030b3c0244b332", "model": "default", "page": 15}, {"hash": "8d3962d8d62baba81d7c1f9136148614f19ce3e67856165e11746a4eea0ba0e2", "model": "default", "page": 16}, {"hash": "c97edc5fbc0c2ebe67d17aaa3a35994c946e63370e43599ff1a53a43cb015f4b", "model": "default", "page": 17}, {"hash": "027b2018dd204fdcfde1f23714efb239c7b0ddec82e6dcb261917047112c59c1", "model": "default", "page": 18}, {"hash": "bfd48e4d4c91d25d1ce8b96cb8b893121083e34366f2336ec2297ad429aefba5", "model": "default", "page": 19}, {"hash": "369aa7281999b4927d03fc80d04f64c4c466672e988a345b8e38b0b413d8b061", "model": "default", "page": 20}, {"hash": "07156cdcdb8bed82cd7def82ae50e3696797733c1af054e52e085d6dc4f158f3", "model": "default", "page": 21}, {"hash": "6868b382cf5f556a700cbfbd3b5019d1874f4d0b539322cf2d66d3ae91750021", "model": "default", "page": 22}, {"hash": "d18c7c0751a2b21cd90b28db97f792b76d800ef27df66b3b7551f30ae8f3c135", "model": "default", "page": 23}, {"hash": "f6bafed831071a1e8cc789bd6dc193c05982a2d11675458e0e470c69e09c39eb", "model": "default", "page": 24}, {"hash": "acd7cf74cdc0fb0f0fa69c5f5d5882fae44ead0f3e98317d0f1ca28a8bbfa0e5", "model": "default", "page": 25}, {"hash": "8618c32aa8f279cc7cfa3df0ffd5eb3f3f54d7216ae3c1af792a4cb778067f0e", "model": "default", "page": 26}, {"hash": "a158cfc6005ac6ec5857112db18ccbab469a42558439bf7c3e2ff5d63894cab4", "model": "default", "page": 27}, {"hash": "d93efe59e4c22d9f511c0df70a9ec01f5a030934af7b9e6f5232db806b143152", "model": "default", "page": 28}, {"hash": "27d5b0915a207c64ea3dfb11866a19a1ef42e54840493113126c243c4be9bd89", "model": "default", "page": 29}, {"hash": "dfd1705fa6a5b13fef569bb8a9dd567ed3950bb91004431c85bd6499135e0d98", "model": "default", "page": 30}, {"hash": "74a78ed4bb5999301ec12e980789a33cabea9131a0a8f03899a49655b7196ea4", "model": "default", "page": 31}, {"hash": "f38e9710dee1ada6c040bdf1d6b33373780790d53391dde1bdf13ba3a24237cc", "model": "default", "page": 32}, {"hash": "4c45d9a942e9431722d4e657e1772e30ed322e4c48aa8fa8e0f582cab686694f", "model": "default", "page": 33}, {"hash": "d5d0bd616da3d60f4162c3bf0c136e6fb6a3776a09daac01676eaf4f194ddbc8", "model": "default", "page": 34}, {"hash": "e68ecc6baff9b98afcefa647a55aa34f9a9fe9f507a393177097d89c5faf1901", "model": "default", "page": 35}, {"hash": "806561bd028d61b13a1d840ef5aa6bc29f596670f6a755cdff7f5ca16156bc6c", "model": "default", "page": 36}, {"hash": "d3b094a0a238bbd0866053a416a69e04d4d8efb937be01a022db3ccb24808e29", "model": "default", "page": 37}, {"hash": "af95f582613321de443a381441d8c361f4ff94fc0f777736e939927cc7d9963f", "model": "default", "page": 38}, {"hash": "28b15af143c0cf1810e8552ebaf1c58b1597cd4bb772e981b5979d0e83c98d0e", "model": "default", "page": 39}, {"hash": "b98218d5619db175ad1a9a2424365094f52356cbefadce85e557ebb182151d82", "model": "default", "page": 40}]}, "main-text": [{"prov": [{"bbox": [285.9599914550781, 760.5719604492188, 417.8999938964844, 782.77197265625], "page": 1, "span": [0, 11], "__ref_s3_data": null}], "text": "Front cover", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/0"}, {"prov": [{"bbox": [44.81999969482422, 595.87158203125, 535.7647094726562, 683.3976440429688], "page": 1, "span": [0, 31], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/1"}, {"prov": [{"bbox": [467.2799987792969, 23.920970916748047, 571.5428466796875, 50.99158477783203], "page": 1, "span": [0, 8], "__ref_s3_data": null}], "text": "Redguide", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/2"}, {"prov": [{"bbox": [64.80000305175781, 511.6919860839844, 292.852783203125, 534.8515014648438], "page": 3, "span": [0, 18], "__ref_s3_data": null}], "text": "Executive overview", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.93536376953125, 393.2442932128906, 547.2804565429688, 476.23895263671875], "page": 3, "span": [0, 573], "__ref_s3_data": null}], "text": "Most industries are susceptible to fraud, which poses a risk to both businesses and consumers. According to The National Health Care Anti-Fraud Association, health care fraud alone causes the nation around $68 billion annually.$^{1}$ This statistic does not include the numerous other industries where fraudulent activities occur daily. In addition, the growing amount of data that enterprises own makes it difficult for them to detect fraud. Businesses can benefit by using an analytical platform to fully integrate their data with artificial intelligence (AI) technology.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.92813110351562, 323.9799499511719, 547.349853515625, 382.1717224121094], "page": 3, "span": [0, 392], "__ref_s3_data": null}], "text": "With IBM Cloud Pakfi for Data on IBM Z, enterprises can modernize their data infrastructure, develop, and deploy machine learning (ML) and AI models, and instantiate highly efficient analytics deployment on IBM LinuxONE. Enterprises can create cutting-edge, intelligent, and interactive applications with embedded AI, colocate data with commercial applications, and use AI to make inferences.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.94898986816406, 253.30953979492188, 547.2882690429688, 312.1575622558594], "page": 3, "span": [0, 385], "__ref_s3_data": null}], "text": "This IBM Redguide publication presents a high-level overview of IBM Z. It describes IBM Cloud Pak for Data (CP4D) on IBM Z and IBM LinuxONE, the different features that are supported on the platform, and how the associated features can help enterprise customers in building AI and ML models by using core transactional data, which results in decreased latency and increased throughput.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8448028564453, 171.74127197265625, 547.2760009765625, 242.12237548828125], "page": 3, "span": [0, 503], "__ref_s3_data": null}], "text": "This publication highlights real-time CP4D on IBM Z use cases. Real-time Clearing and Settlement Transactions, Trustworthy AI and its Role in Day-To-Day Monitoring, and the Prevention of Retail Crimes are use cases that are described in this publication. Using CP4D on IBM Z and LinuxONE, this publication shows how businesses can implement a highly efficient analytics deployment that minimizes latency, cost inefficiencies, and potential security exposures that are connected with data transportation.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 56.842594146728516, 387.7856140136719, 66.6188735961914], "page": 3, "span": [0, 68], "__ref_s3_data": null}], "text": "$^{1 }$https://www.bcbsm.com/health-care-fraud/fraud-statistics.html", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [63.84707260131836, 27.84148406982422, 180.32760620117188, 37.38951110839844], "page": 3, "span": [0, 27], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2023.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [541.6041259765625, 27.93828010559082, 547.2176513671875, 37.469120025634766], "page": 3, "span": [0, 1], "__ref_s3_data": null}], "text": "1", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.75005340576172, 706.0162963867188, 212.3214874267578, 721.6939086914062], "page": 4, "span": [0, 18], "__ref_s3_data": null}], "text": "IBM Z: An overview", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.85592651367188, 630.8654174804688, 539.5514526367188, 689.4552612304688], "page": 4, "span": [0, 365], "__ref_s3_data": null}], "text": "Ever wonder how many transactions a bank processes per day? What about the pace at which these transactions happen? According to an IBMfi report, 44 of 50 of the world's top banks use IBM Z mainframes for these daily transactions.$^{2}$ IBM Z is a platform that is designed for voluminous data, maximum security, real-time transaction analysis, and cost efficiency.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8168182373047, 597.2792358398438, 515.6898803710938, 619.3519287109375], "page": 4, "span": [0, 92], "__ref_s3_data": null}], "text": "The most recent platform for IBM Z is IBM z16\u2122. The IBM z16 supports the following features:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.85256958007812, 580.2994384765625, 255.07154846191406, 590.5203857421875], "page": 4, "span": [0, 39], "__ref_s3_data": null}], "text": "GLYPH On-chip AI acceleration", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.62339782714844, 562.92626953125, 289.7875671386719, 573.06884765625], "page": 4, "span": [0, 45], "__ref_s3_data": null}], "text": "GLYPH Quantum-safe crypto discovery", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.72850036621094, 546.2800903320312, 247.85403442382812, 556.7696533203125], "page": 4, "span": [0, 37], "__ref_s3_data": null}], "text": "GLYPH Simplified compliance", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6919708251953, 528.5509643554688, 225.09173583984375, 539.1024169921875], "page": 4, "span": [0, 33], "__ref_s3_data": null}], "text": "GLYPH Flexible capacity", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.60623168945312, 511.6312561035156, 280.60699462890625, 521.9880981445312], "page": 4, "span": [0, 45], "__ref_s3_data": null}], "text": "GLYPH Modernization of applications", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6087646484375, 494.7903137207031, 210.2745361328125, 505.39013671875], "page": 4, "span": [0, 30], "__ref_s3_data": null}], "text": "GLYPH Sustainability", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.90225219726562, 461.2615661621094, 521.9436645507812, 483.12371826171875], "page": 4, "span": [0, 101], "__ref_s3_data": null}], "text": "With these features, enterprises can upgrade applications while preserving secure and resilient data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9076690673828, 439.1145324707031, 434.5896301269531, 449.38116455078125], "page": 4, "span": [0, 65], "__ref_s3_data": null}], "text": "To learn more about these features, see the IBM z16 product page.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.52610778808594, 417.182861328125, 415.693603515625, 427.33294677734375], "page": 4, "span": [0, 60], "__ref_s3_data": null}], "text": "Figure 1 on page 3 shows a picture of the IBM z16 mainframe.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1376495361328, 57.05204391479492, 311.9372253417969, 67.31904602050781], "page": 4, "span": [0, 49], "__ref_s3_data": null}], "text": "$^{2 }$https://www.ibm.com/case-studies/bankwest/", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [64.08910369873047, 27.93828010559082, 72.8219985961914, 37.42863464355469], "page": 4, "span": [0, 1], "__ref_s3_data": null}], "text": "2", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.64891815185547, 27.621387481689453, 261.53851318359375, 37.20967102050781], "page": 4, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.36338806152344, 333.7419738769531, 211.10719299316406, 343.4512634277344], "page": 5, "span": [0, 16], "__ref_s3_data": null}], "text": "Figure 1 IBM z16", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/3"}, {"prov": [{"bbox": [64.56404876708984, 301.7572937011719, 355.6016540527344, 314.76092529296875], "page": 5, "span": [0, 43], "__ref_s3_data": null}], "text": "IBM z16 and IBM LinuxONE Emperor 4 features", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.82955932617188, 230.37913513183594, 547.1771240234375, 288.7164306640625], "page": 5, "span": [0, 394], "__ref_s3_data": null}], "text": "IBM Z are based on enterprise mainframe technology. Starting with transaction-based workloads and databases, IBM Z has undergone tremendous transformations in its system design for many generations to build servers that cater to Linux-based workloads and security with a cyberresilient system, and support quantum computing and modernization by using a hybrid cloud with a focus on data and AI.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [541.1922607421875, 27.93828010559082, 547.2176513671875, 37.54865264892578], "page": 5, "span": [0, 1], "__ref_s3_data": null}], "text": "3", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.48660278320312, 27.93828010559082, 72.8219985961914, 37.547672271728516], "page": 6, "span": [0, 1], "__ref_s3_data": null}], "text": "4", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.67277526855469, 27.705074310302734, 261.53851318359375, 37.23814392089844], "page": 6, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.30259704589844, 699.2496337890625, 543.5195922851562, 721.4502563476562], "page": 6, "span": [0, 121], "__ref_s3_data": null}], "text": "Figure 2 provides a snapshot of the IBM Z processor roadmap, which depicts the journey of transformation and improvement.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.45831298828125, 403.8142395019531, 213.13937377929688, 413.22802734375], "page": 6, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 2 IBM Z: Processor roadmap", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/4"}, {"prov": [{"bbox": [135.9113311767578, 297.0347900390625, 547.256591796875, 391.550048828125], "page": 6, "span": [0, 689], "__ref_s3_data": null}], "text": "The IBM z16 and IBM LinuxONE Emperor 4 are the latest of the IBM Z, and they are developed with a 'built to build' focus to provide a powerful, cyberresilient, open, and secure platform for business with an extra focus on sustainability to help build sustainable data centers. Although the z16 server can host both IBM z/OSfi and Linux workloads, LinuxONE Emperor 4 is built to host Linux only workloads with a focus on consolidation and resiliency. Depending on the workload, consolidation from numerous x86 servers into a LinuxONE Emperor 4 can help reduce energy consumption by 75% and data center floor space by 50%, which helps to achieve the sustainability goals of the organization.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.12657165527344, 226.86495971679688, 547.257568359375, 285.4850769042969], "page": 6, "span": [0, 432], "__ref_s3_data": null}], "text": "Figure 3 on page 5 shows a summary of the system design of IBM LinuxONE Emperor 4 with the IBM Telum\u2122 processor. The IBM Telum processor chip is designed to run enterprise applications efficiently where their data resides to embed AI with super low latency. The support for higher bandwidth and I/O rates is supported through FCP Express cards with an endpoint security solution. The memory subsystem supports up to 40 TB of memory.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.32421112060547, 430.4823913574219, 297.8570861816406, 439.8866882324219], "page": 7, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 3 System design of IBM z16 LinuxONE Emperor 4", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/5"}, {"prov": [{"bbox": [135.7873992919922, 359.77313232421875, 547.2974243164062, 417.9614562988281], "page": 7, "span": [0, 417], "__ref_s3_data": null}], "text": "The IBM z16 and IBM LinuxONE Emperor 4 servers are built with 7-nm technology at a 5.2 GHz speed. They consist of four dual-chip modules (DCMs) per central processor complex (CPC) drawer, each of which is built with two 8-core Telum processor chips that has \"first in the industry\" on-chip acceleration for mid-transaction, real-time AI inferencing, which supports many different use cases, including fraud detection.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.89752197265625, 277.57452392578125, 547.322265625, 348.19305419921875], "page": 7, "span": [0, 452], "__ref_s3_data": null}], "text": "Each core has access to a huge private 32 MB L2 cache where up to 16 MB of the L2 cache of an inactive core can be used as virtual cache (L3 / L4) by neighboring active cores on the chip. This cache helps address translation and access checking by prefetching the same virtual cache into the L2 cache. The virtual cache also includes Neural Network Processing Assist instructions and direct memory access with protection, and per chip GZIP compression.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [541.3024291992188, 27.93828010559082, 547.2176513671875, 37.632568359375], "page": 7, "span": [0, 1], "__ref_s3_data": null}], "text": "5", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.51323699951172, 27.93828010559082, 72.8219985961914, 37.4283332824707], "page": 8, "span": [0, 1], "__ref_s3_data": null}], "text": "6", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.71185302734375, 27.736722946166992, 261.53851318359375, 37.20823669433594], "page": 8, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.292236328125, 698.827880859375, 541.310546875, 721.4586791992188], "page": 8, "span": [0, 115], "__ref_s3_data": null}], "text": "Figure 4 provides more information about the features of AI Accelerator integration with the IBM Z processor cores.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.19121551513672, 418.4478454589844, 387.3546142578125, 428.0519714355469], "page": 8, "span": [0, 78], "__ref_s3_data": null}], "text": "Figure 4 IBM z16 on-chip AI Accelerator integration with IBM Z processor cores", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/6"}, {"prov": [{"bbox": [135.91659545898438, 323.6279296875, 547.2345581054688, 406.1714172363281], "page": 8, "span": [0, 600], "__ref_s3_data": null}], "text": "The IBM z16 and IBM LinuxONE Emperor 4 server platforms are built with the hardware features that are shown in Figure 4 with addressing data and AI workloads in mind. Regardless of where the ML and deep learning (DL) frameworks are used to build and train data and AI models, the inferencing on existing enterprise application data can happen along currently running enterprise business applications. CP4D 4.6 supports Tensorflow and IBM Snap ML frameworks, which are optimized to use the on-chip AI Accelerator during inferencing. Support for various other frameworks is planned for future releases.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.17147827148438, 289.6532897949219, 544.6222534179688, 311.9052734375], "page": 8, "span": [0, 161], "__ref_s3_data": null}], "text": "Figure 5 on page 7 shows the seamless integration of AI into existing enterprises workloads on the IBM z16 while leveraging the underlying hardware capabilities.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.43448638916016, 481.01043701171875, 189.449951171875, 490.2646179199219], "page": 9, "span": [0, 29], "__ref_s3_data": null}], "text": "Figure 5 Seamless integration", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/7"}, {"prov": [{"bbox": [64.33121490478516, 438.1763000488281, 341.5532531738281, 453.7354431152344], "page": 9, "span": [0, 35], "__ref_s3_data": null}], "text": "What is Cloud Pak for Data on IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0311737060547, 374.9359130859375, 547.132080078125, 421.4658508300781], "page": 9, "span": [0, 353], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data allows enterprises to simplify, unify, and automate the delivery of data and AI. It categorizes the activities within the journey to AI as four rungs of the AI Ladder: Collect, Organize, Analyze, and Infuse. For more information about each of the AI Ladder rungs, see Become Data Driven with IBM Z Infused Data Fabric , REDP-5680.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.93280029296875, 244.90695190429688, 545.086181640625, 363.6493225097656], "page": 9, "span": [0, 857], "__ref_s3_data": null}], "text": "CP4D on IBM Z provides enterprises with a resilient and secure private cloud platform. You can use it to create ML and AI models that may be included into modern intelligent applications. You also can use it to use and construct applications for mission-critical data. With CP4D on IBM Z, enterprises can lower data movement latency, cost inefficiencies, and potential security exposures. Enterprises can safely store and access their most important company data, and leverage their current infrastructure by using cutting-edge hybrid cloud applications. Enterprises can combine their current database applications without any rewrites, which results in reduced cost and complexity. Lastly, by using CP4D on IBM Z, enterprises can update their database infrastructure to benefit from easier management, a quicker time to value, and lower operating expenses.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [541.0161743164062, 27.93828010559082, 547.2838745117188, 37.731361389160156], "page": 9, "span": [0, 1], "__ref_s3_data": null}], "text": "7", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.47938537597656, 27.93828010559082, 72.8219985961914, 37.48422622680664], "page": 10, "span": [0, 1], "__ref_s3_data": null}], "text": "8", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [87.7466049194336, 27.745710372924805, 261.53851318359375, 37.175758361816406], "page": 10, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.06129455566406, 626.6174926757812, 547.2805786132812, 721.595947265625], "page": 10, "span": [0, 655], "__ref_s3_data": null}], "text": "Figure 6 shows a solution overview of CP4D. The infrastructure alternatives are shown at the bottom, and they include IBM Z and LinuxONE. They all leverage Red Hat OpenShift. Common Foundational Services come next, which offer clarity throughout the data and AI lifecycle, that is, from user access management to monitoring and service provisioning. A high-level view of the services is shown in the middle section. The services have several different capabilities that span the AI hierarchy. The platform can be expanded, and it offers a seamless user experience for all distinct personas across the AI lifecycle, from data gathering through AI infusion.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.57273864746094, 298.9583435058594, 264.11474609375, 308.77056884765625], "page": 10, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6 Solution overview of Cloud Pak for Data", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/8"}, {"prov": [{"bbox": [135.52293395996094, 276.2957763671875, 518.3954467773438, 286.7032775878906], "page": 10, "span": [0, 87], "__ref_s3_data": null}], "text": "We highlight the four main pillars that make IBM Z the correct infrastructure for CP4D:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.77655029296875, 259.4186096191406, 255.66061401367188, 269.49017333984375], "page": 10, "span": [0, 37], "__ref_s3_data": null}], "text": "GLYPH Performance and Scale", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.59234619140625, 242.43878173828125, 257.89263916015625, 252.5904083251953], "page": 10, "span": [0, 37], "__ref_s3_data": null}], "text": "GLYPH Embedded Accelerators", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.40530395507812, 224.8186492919922, 263.65850830078125, 235.10702514648438], "page": 10, "span": [0, 44], "__ref_s3_data": null}], "text": "GLYPH Reliability and Availability", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4591064453125, 208.41940307617188, 269.5468444824219, 218.50930786132812], "page": 10, "span": [0, 40], "__ref_s3_data": null}], "text": "GLYPH Security and Governance.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.9900665283203, 102.40120697021484, 547.2814331054688, 196.7720184326172], "page": 10, "span": [0, 650], "__ref_s3_data": null}], "text": "From a performance perspective, CP4D on IBM Z provides your data and AI with high transaction processing and a powerful infrastructure. From the embedded accelerators perspective, CP4D on IBM Z can investigate each transaction thanks to a cutting-edge DL inference technology even in the most demanding, sensitive, and latency-prone real-time workloads. From a reliability perspective, CP4D on IBM Z provides high availability and resiliency. Lastly from the security perspective, CP4D on IBM Z is suitable for protecting sensitive data and AI models for enterprises in highly regulated industries or those industries that are worried about security.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 705.8773193359375, 519.7557983398438, 721.6634521484375], "page": 11, "span": [0, 57], "__ref_s3_data": null}], "text": "Cloud Pak for Data capabilities on IBM Z and IBM LinuxONE", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.6363067626953, 642.8818969726562, 544.5404052734375, 689.4312133789062], "page": 11, "span": [0, 291], "__ref_s3_data": null}], "text": "With CP4D on IBM Z and IBM LinuxONE, users can develop, train, and deploy AI and ML models. Users can accomplish this task by using the CP4D IBM Watsonfi Studio and IBM Watson Machine Learning (WLM) services. By using these two fundamental services, users can accomplish the following tasks:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.7655487060547, 626.25927734375, 341.2439270019531, 636.0855102539062], "page": 11, "span": [0, 58], "__ref_s3_data": null}], "text": "GLYPH Provision various containerized databases.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.609619140625, 608.9733276367188, 423.5125427246094, 619.2939453125], "page": 11, "span": [0, 77], "__ref_s3_data": null}], "text": "GLYPH Explore, clean, shape, and alter data by using Data Refinery.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.44422912597656, 592.2996826171875, 454.5639343261719, 602.7785034179688], "page": 11, "span": [0, 87], "__ref_s3_data": null}], "text": "GLYPH Use project-specific data that is uploaded, or connect to distant data.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.59793090820312, 575.0346069335938, 331.7221984863281, 585.4097900390625], "page": 11, "span": [0, 56], "__ref_s3_data": null}], "text": "GLYPH Create Spark run times and applications.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.42922973632812, 558.2803344726562, 544.107177734375, 568.422607421875], "page": 11, "span": [0, 104], "__ref_s3_data": null}], "text": "GLYPH Create, build, evaluate, and deploy analytics and ML models with trust and transparency.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.64759826660156, 541.0048217773438, 499.1278381347656, 551.3433227539062], "page": 11, "span": [0, 92], "__ref_s3_data": null}], "text": "GLYPH Leverage the AI Integrated Accelerator for TensorFlow 2.7.2 and Snap ML 1.9.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.32513427734375, 507.2811279296875, 538.98681640625, 529.5091552734375], "page": 11, "span": [0, 116], "__ref_s3_data": null}], "text": "For more information about the specifics of these capabilities, see Capabilities on Linux on IBM Z and IBM LinuxONE.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 463.8086853027344, 250.52972412109375, 479.4306945800781], "page": 11, "span": [0, 21], "__ref_s3_data": null}], "text": "Open-source ecosystem", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.8963165283203, 376.8506164550781, 543.4259643554688, 447.3017272949219], "page": 11, "span": [0, 484], "__ref_s3_data": null}], "text": "These days, innovation and product development are not limited to closed doors within an organization. In any industry sector, the solutions include a mix of proprietary code addressing the core business solution that is supported or integrated into other software components from open source. In some cases, enterprises business solutions also are built from open-source community offerings. Thus, open-source software becomes an important ingredient in modern-day solution building.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9189453125, 271.0766906738281, 547.2396850585938, 365.0788269042969], "page": 11, "span": [0, 684], "__ref_s3_data": null}], "text": "IBM actively participates in various open-source communities as part of steering boards defining the roadmap of the community, and also in contributing code to make the community a better place for everyone to participate. Red Hat also actively participates in various open-source communities and makes extensive contributions. In open-source communities, although most open-source development happens on x86 / amd64 or the Intel architecture, the same open-source software is used by other architectures, such as IBM Power (ppc64le), IBM Z and IBM LInuxONE (s390x), ARM, and Sparc. So, the availability of an open-source ecosystem on any architecture is key and critical to business.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0531463623047, 213.07455444335938, 537.3534545898438, 259.5094299316406], "page": 11, "span": [0, 331], "__ref_s3_data": null}], "text": "On IBM Z and IBM LinuxONE (s390x) architecture, there is a huge open-source support ecosystem that ranges from operating systems such as Linux; application run times; cloud and container services; DevOps and automation; big data; observability; analytics; databases; and storage. The ecosystem on IBM Z and IBM LinuxONE is growing.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2856903076172, 167.24769592285156, 540.7626342773438, 201.58775329589844], "page": 11, "span": [0, 225], "__ref_s3_data": null}], "text": "IBM Z and IBM LinuxONE include much open-source software in their ecosystem. You can see the growing list of open-source software for IBM Z and LinuxONE at The Growing Ecosystem of Open-Source Software for IBM Z and LinuxONE.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.05987548828125, 108.99290466308594, 544.6069946289062, 155.43284606933594], "page": 11, "span": [0, 326], "__ref_s3_data": null}], "text": "IBM Z and IBM LinuxONE are available to various communities to include support for s390x builds as part of their community's continuous integration and continuous delivery (CI/CD). Also, for open-source community developers, infrastructure resources are available on a no-charge basis through the IBM LinuxONE community cloud.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [541.1886596679688, 27.93828010559082, 547.2176513671875, 37.77804183959961], "page": 11, "span": [0, 1], "__ref_s3_data": null}], "text": "9", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.68538665771484, 27.93828010559082, 78.4020004272461, 37.55242919921875], "page": 12, "span": [0, 2], "__ref_s3_data": null}], "text": "10", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.686418533325195, 267.07440185546875, 37.19207000732422], "page": 12, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.9039306640625, 651.1361083984375, 547.312255859375, 721.5728759765625], "page": 12, "span": [0, 505], "__ref_s3_data": null}], "text": "CP4D includes a mix of open-source and proprietary data and AI runtime databases; open-source run times like Python; open-source data platforms like Anaconda; ML and DL frameworks like Pytorch and Tensorflow; and thousands of reusable Python packages. All of them are available and supported on s390x architecture to provide seamless parity with x86 architecture and a seamless experience for enterprise data scientists, architects, and data and AI solution developers on IBM Z and IBM LinuxONE platforms.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9044647216797, 581.2603759765625, 547.3501586914062, 639.3648681640625], "page": 12, "span": [0, 427], "__ref_s3_data": null}], "text": "Anaconda is one of the open-source data platforms that provide Python and R based data science ML frameworks; analytics and data visualization tools; and open-source data science tools and libraries like Conda, XGBoost, and SciKit-Learn. Anaconda runs natively on Linux on IBM Z and IBM LinuxONE, and on IBM z/OS Container Extensions (zcX) on z/OS. For more information, see Announcing Anaconda for Linux on IBM Z and LinuxONE.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.92164611816406, 498.8492736816406, 546.230712890625, 569.2470703125], "page": 12, "span": [0, 501], "__ref_s3_data": null}], "text": "In addition to strong, open-source ecosystem support for application development on Linux and enterprise operating systems, a new generation of IBM Z and IBM LinuxONE servers (IBM z16\u2122) also have strong platform support, and AI acceleration capabilities that can be leveraged by open-source software to perform better on the server infrastructure. For example, the recently released CP4D 4.6 has Tensorflow and IBM SnapML frameworks that leverage the AI accelerators when running on an IBM z16 server.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.26947021484375, 465.1493225097656, 521.3436889648438, 487.5268249511719], "page": 12, "span": [0, 143], "__ref_s3_data": null}], "text": "So, to summarize, there is a huge, growing data and AI open source ecosystem that is supported and optimized on IBM Z and IBM LinuxONE servers.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.2679214477539, 421.844482421875, 191.19384765625, 437.6422119140625], "page": 12, "span": [0, 15], "__ref_s3_data": null}], "text": "Why AI on IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.91458129882812, 334.8140563964844, 547.2586059570312, 405.332275390625], "page": 12, "span": [0, 496], "__ref_s3_data": null}], "text": "Data and AI playing a major role in the modernization story to enable the digital transformation journey of every organization. Many organizations recognize the business value of infusing AI into their infrastructure. CP4D provides the cloud-native solution to put your data to work. With CP4D, all your data users can collaborate from a single, unified interface that supports many services that work together, including collecting data, organizing the data, analyzing the data, and infusing AI.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.89390563964844, 241.02479553222656, 547.2825317382812, 323.4716491699219], "page": 12, "span": [0, 546], "__ref_s3_data": null}], "text": "Traditional ML models' power most of today's ML applications in business and among AI practitioners. CP4D supports traditional ML frameworks for training and inferencing, such as Scikit-learn, Snap ML, and XGBoost. Snap ML is a library that provides high-speed training and inferencing of ML models that leverage the AI accelerator while running on an IBM z16 (Linux on IBM Z). CP4D supports DL frameworks such as TensorFlow and PyTorch. TensorFlow is a DL framework that leverages the AI accelerator while running on an IBM z16 (Linux on IBM Z).", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0904541015625, 183.28155517578125, 547.3233032226562, 229.29551696777344], "page": 12, "span": [0, 288], "__ref_s3_data": null}], "text": "Figure 7 on page 11 provides an overview of the components that are supported on CP4D on IBM Z. You can leverage Watson Studio for model building, training, and validation, and WML for deployment of the model. Eventually, applications can use the AI inference endpoint to score the model.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.48258972167969, 428.9914245605469, 506.38671875, 438.489013671875], "page": 13, "span": [0, 104], "__ref_s3_data": null}], "text": "Figure 7 Developing, training, and deploying an AI model on Cloud Pak for Data on IBM Z and IBM LinuxONE", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/9"}, {"prov": [{"bbox": [136.08367919921875, 406.4902648925781, 492.9408264160156, 416.6065673828125], "page": 13, "span": [0, 75], "__ref_s3_data": null}], "text": "In summary, here are some of the reasons why you should choose AI on IBM Z:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6901397705078, 389.77813720703125, 413.4001770019531, 399.8421936035156], "page": 13, "span": [0, 75], "__ref_s3_data": null}], "text": "GLYPH World-class AI inference platform for enterprise workloads:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.4140167236328, 360.75909423828125, 526.1881103515625, 382.4400634765625], "page": 13, "span": [0, 89], "__ref_s3_data": null}], "text": "-Embedded accelerators: A centralized on-chip AI accelerator that is shared by all cores.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.0751190185547, 331.77947998046875, 547.2465209960938, 353.9866638183594], "page": 13, "span": [0, 113], "__ref_s3_data": null}], "text": "-Industry standard AI ecosystem: Many industry open-source data science frameworks are available on the platform.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.26991271972656, 302.2157287597656, 546.7576904296875, 324.52313232421875], "page": 13, "span": [0, 108], "__ref_s3_data": null}], "text": "-Seamlessly integrate AI into existing enterprise workload stacks: Train anywhere, and then deploy on IBM Z.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.60690307617188, 285.71148681640625, 490.0331726074219, 295.9700927734375], "page": 13, "span": [0, 88], "__ref_s3_data": null}], "text": "GLYPH Security: Encrypted memory, and improved trusted execution environments.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.39907836914062, 256.0257873535156, 547.2705078125, 278.7433776855469], "page": 13, "span": [0, 138], "__ref_s3_data": null}], "text": "GLYPH Sustainability: Reduce your energy consumption with real-time monitoring tools about the energy consumption of the system.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.38404083251953, 213.536376953125, 161.7474365234375, 228.9129180908203], "page": 13, "span": [0, 12], "__ref_s3_data": null}], "text": "AI use cases", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.81288146972656, 162.75877380371094, 533.9012451171875, 196.6822052001953], "page": 13, "span": [0, 262], "__ref_s3_data": null}], "text": "With billions of transactions per day in many of today's industries, it is key to get real-time insights about what is happening in your data. AI on the IBM Z stack understands these situations, and it delivers in-transaction inference in real time and at scale.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0598602294922, 104.60002136230469, 547.2466430664062, 150.73692321777344], "page": 13, "span": [0, 295], "__ref_s3_data": null}], "text": "Core banking solutions running on IBM Z that are involved in processing inbound transactions need real-time fraud detection to prevent fraud. Other types of possible use cases might be credit risk analysis, anti-money laundering, loan approval, fraud detection in payments, and instant payments.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.34182739257812, 70.34142303466797, 547.311279296875, 92.59844207763672], "page": 13, "span": [0, 146], "__ref_s3_data": null}], "text": "For insurance companies, a pressing use case would be claims processing. For markets and trading, clearing and settlement use cases are paramount.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.570030212402344], "page": 13, "span": [0, 2], "__ref_s3_data": null}], "text": "11", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.70201873779297, 27.93828010559082, 78.4020004272461, 37.41368103027344], "page": 14, "span": [0, 2], "__ref_s3_data": null}], "text": "12", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.684078216552734, 267.07440185546875, 37.171226501464844], "page": 14, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.13888549804688, 687.2786865234375, 525.1851196289062, 721.3170166015625], "page": 14, "span": [0, 184], "__ref_s3_data": null}], "text": "For the health care industry, medical image processing (such as MRIs and x-rays), skin cancer detection, and patient monitoring activities such as infant motion analysis, is important.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9561309814453, 641.2594604492188, 547.3113403320312, 675.2321166992188], "page": 14, "span": [0, 192], "__ref_s3_data": null}], "text": "For the airline industry, processes such as air traffic management, flight management systems, and flight maintenance predictions are use cases that are ideal candidates for using AI on IBM Z.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.2395782470703, 619.1561279296875, 413.4231262207031, 629.3563842773438], "page": 14, "span": [0, 63], "__ref_s3_data": null}], "text": "In the following sections, we describe the following use cases:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.60646057128906, 561.1961059570312, 545.5465698242188, 611.9253540039062], "page": 14, "span": [0, 306], "__ref_s3_data": null}], "text": "GLYPH \"Use case 1: Responsible AI augmented with risk and regulatory compliance\" on page 12 AI model lifecycle governance, risk management, and regulatory compliance are key to the success of the enterprises. It is imperative to adopt a typical AI model lifecycle to protect new end-to-end risks.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.66265869140625, 544.2186279296875, 402.90234375, 554.5841674804688], "page": 14, "span": [0, 71], "__ref_s3_data": null}], "text": "GLYPH \"Use case 2: Credit default risk assessment\" on page 22", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.4883270263672, 490.5600280761719, 547.2406005859375, 537.6137084960938], "page": 14, "span": [0, 295], "__ref_s3_data": null}], "text": "Core banking solutions running on IBM Z that are involved in processing inbound transactions need real-time fraud detection to prevent fraud. Other types of possible use cases might be credit risk analysis, anti-money laundering, loan approval, fraud detection in payments, and instant payments.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56887817382812, 474.1841735839844, 371.8055114746094, 484.5502624511719], "page": 14, "span": [0, 64], "__ref_s3_data": null}], "text": "GLYPH \"Use case 3: Clearing and settlement\" on page 25", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.5205841064453, 444.71429443359375, 541.1401977539062, 467.2469177246094], "page": 14, "span": [0, 151], "__ref_s3_data": null}], "text": "The use of AI can help to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6227569580078, 398.9003601074219, 534.64013671875, 437.9700012207031], "page": 14, "span": [0, 254], "__ref_s3_data": null}], "text": "GLYPH \"Use case 4: Remaining Useful Life of an aircraft engine\" on page 27 We describe how AI can help to avoid unplanned aircraft downtime by determining the remaining time or cycles that an aircraft engine is likely to operate before failure.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4189910888672, 370.28338623046875, 539.6531372070312, 392.7353515625], "page": 14, "span": [0, 112], "__ref_s3_data": null}], "text": "GLYPH \"Use case 5: AI-powered video analytics on an infant's motions for health prediction\" on page 30", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.01174926757812, 340.8392639160156, 547.24267578125, 363.40240478515625], "page": 14, "span": [0, 117], "__ref_s3_data": null}], "text": "In this section, we describe how AI can predict an infant's health conditions by monitoring real-time body movements.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.68285369873047, 278.4156799316406, 547.2564697265625, 313.73541259765625], "page": 14, "span": [0, 72], "__ref_s3_data": null}], "text": "Use case 1: Responsible AI augmented with risk and regulatory compliance", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.70286560058594, 216.10757446289062, 547.1787719726562, 262.7936706542969], "page": 14, "span": [0, 326], "__ref_s3_data": null}], "text": "Advancement in AI is changing the world, and organizations must adopt AI to embrace new challenges daily. Many enterprises see tremendous value in adopting AI and ML technologies while establishing organization trust in the models, underlying data, and the process to be followed. An AI model lifecycle can be a daunting task.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3160858154297, 181.8393096923828, 547.2424926757812, 204.10169982910156], "page": 14, "span": [0, 152], "__ref_s3_data": null}], "text": "How mature is your AI governance? In this section, we provide a use case demonstrating the trustworthiness of AI and its importance in daily monitoring.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.45439910888672, 149.6572723388672, 186.71859741210938, 162.72799682617188], "page": 14, "span": [0, 19], "__ref_s3_data": null}], "text": "Industry challenges", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.2456512451172, 126.27838134765625, 508.98724365234375, 136.55023193359375], "page": 14, "span": [0, 83], "__ref_s3_data": null}], "text": "Here are the three main reasons why organizations struggle with the adoption of AI:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.55795288085938, 109.18142700195312, 293.96282958984375, 119.35371398925781], "page": 14, "span": [0, 48], "__ref_s3_data": null}], "text": "GLYPH Scaling with growing regulations", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5261993408203, 91.55158233642578, 435.7274475097656, 102.10991668701172], "page": 14, "span": [0, 80], "__ref_s3_data": null}], "text": "GLYPH Lack of confidence in operationalized AI (making responsible AI)", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4950714111328, 74.97781372070312, 466.1335144042969, 85.43976593017578], "page": 14, "span": [0, 85], "__ref_s3_data": null}], "text": "GLYPH Challenges around managing the risk throughout the entire AI workflow", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.3238983154297, 709.180908203125, 324.71160888671875, 721.5367431640625], "page": 15, "span": [0, 32], "__ref_s3_data": null}], "text": "Scaling with growing regulations", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.0789337158203, 636.2792358398438, 536.3155517578125, 706.6138916015625], "page": 15, "span": [0, 476], "__ref_s3_data": null}], "text": "Laws and regulations in the data and AI space are accelerating, and many countries are proposing strict AI policies. Countries are monitoring adherence of these policies by the enterprises and imposing fines for any violations. Responding to these regulations are challenging global organizations where multiple regulations apply. For enterprises, it is important to adopt AI policies when there is change, and to validate explainable models to protect against discrimination.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 608.0890502929688, 223.54685974121094, 620.142333984375], "page": 15, "span": [0, 14], "__ref_s3_data": null}], "text": "Responsible AI", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.01739501953125, 558.7826538085938, 547.3283081054688, 605.0657348632812], "page": 15, "span": [0, 362], "__ref_s3_data": null}], "text": "Responsible AI protects against loss of data privacy, and reduced customer loyalty and trust. A data scientist cannot maximize accuracy and model performance above all other concerns. Practicing responsible AI is a best practice, and you must establish protection and validation to ensure that any models that are placed into production are fair and explainable.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.6778106689453, 531.367431640625, 364.1976623535156, 543.471435546875], "page": 15, "span": [0, 39], "__ref_s3_data": null}], "text": "Risks throughout the entire AI workflow", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.38307189941406, 517.2207641601562, 389.47918701171875, 528.18359375], "page": 15, "span": [0, 59], "__ref_s3_data": null}], "text": "Organizations need to mitigate risk of the following items:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.84344482421875, 501.0563659667969, 382.91455078125, 511.1645812988281], "page": 15, "span": [0, 69], "__ref_s3_data": null}], "text": "GLYPH Deciding not to use certain technologies or practices", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.61293029785156, 484.2521667480469, 450.990234375, 494.5205993652344], "page": 15, "span": [0, 80], "__ref_s3_data": null}], "text": "GLYPH Using personal information when needed and with a user's consent", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7186737060547, 467.2592468261719, 366.2126770019531, 477.1536865234375], "page": 15, "span": [0, 63], "__ref_s3_data": null}], "text": "GLYPH Ensuring automated decisions are free from bias", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.6738739013672, 450.26641845703125, 462.3146057128906, 460.5806579589844], "page": 15, "span": [0, 84], "__ref_s3_data": null}], "text": "GLYPH Customer confidence by providing explanations for business decisions", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.5981903076172, 433.1468200683594, 386.45635986328125, 443.06573486328125], "page": 15, "span": [0, 68], "__ref_s3_data": null}], "text": "GLYPH Fraud to the organization and to customer's accounts", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56326293945312, 415.8823547363281, 331.2491149902344, 425.7867431640625], "page": 15, "span": [0, 56], "__ref_s3_data": null}], "text": "GLYPH Delays in putting models into production", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.1046905517578, 370.3006286621094, 547.186767578125, 404.339111328125], "page": 15, "span": [0, 232], "__ref_s3_data": null}], "text": "In fact, in a recent survey, these concerns were echoed by real AI adopters when asked what aspects of trust are most important to them. Although explaining how AI decides is the primary concern, all of these concerns are important.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.93702697753906, 300.23199462890625, 547.24658203125, 358.55194091796875], "page": 15, "span": [0, 431], "__ref_s3_data": null}], "text": "The key point here is that risk exists throughout the entire AI lifecycle starting with the underlying data and the business justification behind the \"why\" of the project and continuing into production. Without a formalized process, there is no way to mitigate these risks to unlock the scale that is required to make automated decisions profitable. With these decisions, the business can operate proactively instead of reactively.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.52984619140625], "page": 15, "span": [0, 2], "__ref_s3_data": null}], "text": "13", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.66812133789062, 27.93828010559082, 78.4020004272461, 37.513004302978516], "page": 16, "span": [0, 2], "__ref_s3_data": null}], "text": "14", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.6509952545166, 267.07440185546875, 37.20136260986328], "page": 16, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.99598693847656, 675.0350341796875, 547.3073120117188, 721.3980712890625], "page": 16, "span": [0, 309], "__ref_s3_data": null}], "text": "For example, a business can start testing a model before production for fairness metrics. For this task, enterprises need an end-to-end workflow with approvals to mitigate these risks and increase the scale of AI investments, as shown in Figure 8, which presents a typical AI model lifecycle in an enterprise.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.23173522949219, 450.5171203613281, 206.54298400878906, 459.9090881347656], "page": 16, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 8 Typical AI model lifecycle", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/10"}, {"prov": [{"bbox": [135.95455932617188, 367.7792053222656, 540.1202392578125, 437.9422607421875], "page": 16, "span": [0, 464], "__ref_s3_data": null}], "text": "Due to regulations, more stakeholders adopt the typical AI model lifecycle to protect their brand from new end-to-end risks. To ensure various aspects of both regulatory compliance and security, the personas that must be involved include the chief financial officer (CFO), chief marketing officer (CMO), chief data officer (CDO), HR, and chief regulatory officer (CRO), along with the data engineers, data scientists, and business analysts, who build AI workflows.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.70768737792969, 335.0872802734375, 279.20489501953125, 348.1524353027344], "page": 16, "span": [0, 33], "__ref_s3_data": null}], "text": "IBM governance solution for IBM Z", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.95175170898438, 299.73846435546875, 540.66015625, 322.0162048339844], "page": 16, "span": [0, 112], "__ref_s3_data": null}], "text": "AI model lifecycle governance, risk management, and regulatory compliance are key to the success of enterprises.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.83395385742188, 181.78021240234375, 547.3551025390625, 288.1084899902344], "page": 16, "span": [0, 755], "__ref_s3_data": null}], "text": "AI governance is a comprehensive framework that uses a set of automated processes, methodologies, and tools to manage an organization's use of AI. Consistent principles guiding the design, development, deployment, and monitoring of models are critical in driving responsible and trustworthy AI. AI governance includes processes that trace and record the origin of data, models (including associated metadata), and pipelines for audits. The details of entry should include the techniques that trained each model, the hyperparameters that were used, and the metrics from testing phases. These details provide increased transparency into the model's behavior throughout the lifecycle, the data that was influential in its development, and the possible risks.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.96249389648438, 123.76123809814453, 543.354248046875, 169.8548126220703], "page": 16, "span": [0, 292], "__ref_s3_data": null}], "text": "In a world where trust, transparency and explainable AI matters, every organization wants compliance along with the comfort of understanding how analytic insights and decisions are made. The following sections describe some of the principles and organizational requirements for AI governance.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.5681610107422, 710.160400390625, 249.01470947265625, 721.4048461914062], "page": 17, "span": [0, 20], "__ref_s3_data": null}], "text": "Lifecycle governance", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.06361389160156, 672.2786865234375, 544.0435791015625, 706.5530395507812], "page": 17, "span": [0, 202], "__ref_s3_data": null}], "text": "Lifecycle governance helps you manage your business information throughout its lifecycle, that is, from creation to deletion. IBM AI governance addresses the problems that challenge records managements:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.78414916992188, 655.0160522460938, 517.3616333007812, 665.3117065429688], "page": 17, "span": [0, 97], "__ref_s3_data": null}], "text": "GLYPH Monitor, catalog, and govern AI models from anywhere throughout the AI lifecycle.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.66246032714844, 637.6724243164062, 428.482666015625, 647.8308715820312], "page": 17, "span": [0, 77], "__ref_s3_data": null}], "text": "GLYPH Automate the capture of model metadata for report generation.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.61790466308594, 621.2044067382812, 352.8333740234375, 631.1478271484375], "page": 17, "span": [0, 62], "__ref_s3_data": null}], "text": "GLYPH Drive transparent and explainable AI at scale.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.47805786132812, 604.2398681640625, 531.1472778320312, 614.4699096679688], "page": 17, "span": [0, 103], "__ref_s3_data": null}], "text": "GLYPH Increase accuracy of predictions by identifying how AI is used and where it is lagging.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.8000030517578, 580.1421508789062, 231.87411499023438, 591.301513671875], "page": 17, "span": [0, 15], "__ref_s3_data": null}], "text": "Risk management", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.37351989746094, 553.9462890625, 544.0723266601562, 575.8314819335938], "page": 17, "span": [0, 130], "__ref_s3_data": null}], "text": "Risk management is used in IBM AI governance to identify, manage, monitor, and report on risk and compliance initiatives at scale:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.86279296875, 537.0994262695312, 497.7820739746094, 547.1629028320312], "page": 17, "span": [0, 89], "__ref_s3_data": null}], "text": "GLYPH Automate facts and workflow management to comply with business standards.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.80929565429688, 520.2988891601562, 455.0130310058594, 530.4574584960938], "page": 17, "span": [0, 82], "__ref_s3_data": null}], "text": "GLYPH Use dynamic dashboards for clear and concise customizable results.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.7198486328125, 502.8976135253906, 440.54815673828125, 513.4384155273438], "page": 17, "span": [0, 79], "__ref_s3_data": null}], "text": "GLYPH Enhanced collaboration across multiple regions and geographies.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.74740600585938, 479.4071350097656, 258.4198303222656, 490.6507568359375], "page": 17, "span": [0, 21], "__ref_s3_data": null}], "text": "Regulatory compliance", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.20217895507812, 428.6148681640625, 547.2466430664062, 475.2479248046875], "page": 17, "span": [0, 366], "__ref_s3_data": null}], "text": "Regulatory compliance is a set of rules that organizations must follow to protect sensitive information and ensure human safety. Any business that works with digital assets, consumer data, health regulations, employee safety, and private communications is subject to regulatory compliance.$^{3}$ The IBM AI governance solution for IBM Z includes the following tasks:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.8566436767578, 412.22723388671875, 433.3389892578125, 422.5816955566406], "page": 17, "span": [0, 80], "__ref_s3_data": null}], "text": "GLYPH Help adhere to external AI regulations for audit and compliance.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.61163330078125, 395.0362243652344, 465.02978515625, 405.4002990722656], "page": 17, "span": [0, 88], "__ref_s3_data": null}], "text": "GLYPH Convert external AI regulations into policies for automatic enforcement.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.56532287597656, 378.1607971191406, 503.326171875, 388.5216064453125], "page": 17, "span": [0, 93], "__ref_s3_data": null}], "text": "GLYPH Use dynamic dashboards for compliance status across policies and regulations.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.35816955566406, 319.4354248046875, 547.2515869140625, 365.9399719238281], "page": 17, "span": [0, 271], "__ref_s3_data": null}], "text": "Enterprises can develop AI models and deploy them by using IBM Watson Studio or WML on CP4D on Red Hat OpenShift on a virtual machine that is based on IBM z/VM or Red Hat Enterprise Linux KVM on IBM Z. AI governance on IBM LinuxONE is supported in the following two ways:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.70884704589844, 291.2804870605469, 526.8416137695312, 313.489013671875], "page": 17, "span": [0, 119], "__ref_s3_data": null}], "text": "GLYPH Monitor the AI models with Watson OpenScale on CP4D on Red Hat OpenShift on a virtual machine on IBM Z.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.62962341308594, 225.8667449951172, 541.8055419921875, 283.9451904296875], "page": 17, "span": [0, 413], "__ref_s3_data": null}], "text": "GLYPH Enterprises can develop AI models by creating and training models by using Watson Studio and development tools such as Jupyter Notebook or JupyterLab, and then deploying the model onto WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z. Then, these enterprises can achieve end-end AI governance by running AI Factsheets, IBM Watson OpenScale, and IBM Watson OpenPagesfi on CP4D on x86.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.40660095214844, 204.2818603515625, 512.4911499023438, 214.43418884277344], "page": 17, "span": [0, 82], "__ref_s3_data": null}], "text": "Figure 9 on page 16 shows the end-to-end flow for a remote AI governance solution.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0848846435547, 56.93043899536133, 418.2659606933594, 66.85415649414062], "page": 17, "span": [0, 75], "__ref_s3_data": null}], "text": "$^{3 }$https://www.proofpoint.com/us/threat-reference/regulatory-compliance", "type": "footnote", "name": "Footnote", "font": null}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.63853454589844], "page": 17, "span": [0, 2], "__ref_s3_data": null}], "text": "15", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.46823120117188, 488.9634094238281, 295.7712097167969, 498.658203125], "page": 18, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 9 Remote AI governance solution end-to-end flow", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/11"}, {"prov": [{"bbox": [135.7501678466797, 466.32562255859375, 438.0164794921875, 476.6277160644531], "page": 18, "span": [0, 66], "__ref_s3_data": null}], "text": "To achieve end-to-end AI governance, complete the following steps:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.78916931152344, 437.65087890625, 541.7039184570312, 460.17938232421875], "page": 18, "span": [0, 96], "__ref_s3_data": null}], "text": "1. Create a model entry in IBM OpenPages by using CP4D on a x86 platform, as shown in Figure 10.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.23847198486328, 114.00309753417969, 279.3128356933594, 123.19122314453125], "page": 18, "span": [0, 49], "__ref_s3_data": null}], "text": "Figure 10 Creating a model entry in IBM OpenPages", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/12"}, {"prov": [{"bbox": [64.66849517822266, 27.93828010559082, 78.4020004272461, 37.487548828125], "page": 18, "span": [0, 2], "__ref_s3_data": null}], "text": "16", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.65258026123047, 267.07440185546875, 37.216922760009766], "page": 18, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.2081298828125, 686.9100952148438, 542.9114379882812, 721.3231811523438], "page": 19, "span": [0, 192], "__ref_s3_data": null}], "text": "2. Train a model by using Watson Studio and by using development tools such as Jupyter Notebook or JupyterLab on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, as shown in Figure 11.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.40596008300781, 366.4603576660156, 290.68634033203125, 375.5119934082031], "page": 19, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 11 Training an AI model by using Watson Studio", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/13"}, {"prov": [{"bbox": [136.1393585205078, 331.91552734375, 547.2686767578125, 353.865966796875], "page": 19, "span": [0, 115], "__ref_s3_data": null}], "text": "3. Deploy the model by using WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, as shown in Figure 12.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.62543487548828, 56.46229553222656, 351.1957092285156, 65.96401977539062], "page": 19, "span": [0, 66], "__ref_s3_data": null}], "text": "Figure 12 Deploying an AI model by using WML on Cloud Pak for Data", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/14"}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.6064338684082], "page": 19, "span": [0, 2], "__ref_s3_data": null}], "text": "17", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.96473693847656, 674.9818725585938, 547.24560546875, 721.3460083007812], "page": 20, "span": [0, 338], "__ref_s3_data": null}], "text": "4. Track the external model lifecycle by browsing through the Catalogs/Platform assets catalog by using AI Factsheets and OpenPages while using CP4D on an x86 platform, as shown in Figure 13. The external model (deployed on CP4D on Red Hat OpenShift on a virtual machine on IBM Z) is saved as a platform asset catalog on the x86 platform.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.30557250976562, 394.5406799316406, 242.96470642089844, 403.65936279296875], "page": 20, "span": [0, 24], "__ref_s3_data": null}], "text": "Figure 13 External model", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/15"}, {"prov": [{"bbox": [150.36965942382812, 359.5168151855469, 547.290283203125, 382.07330322265625], "page": 20, "span": [0, 127], "__ref_s3_data": null}], "text": "You can track the model through each stage of the model lifecycle, as shown in Figure 14, by using AI Factsheets and OpenPages.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.4922866821289, 72.41610717773438, 187.39756774902344, 81.66903686523438], "page": 20, "span": [0, 28], "__ref_s3_data": null}], "text": "Figure 14 Tracking the model", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/16"}, {"prov": [{"bbox": [64.6649169921875, 27.93828010559082, 78.4020004272461, 37.509376525878906], "page": 20, "span": [0, 2], "__ref_s3_data": null}], "text": "18", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.659908294677734, 267.07440185546875, 37.17069625854492], "page": 20, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [150.21810913085938, 698.8858642578125, 547.2222290039062, 721.4878540039062], "page": 21, "span": [0, 122], "__ref_s3_data": null}], "text": "You can see that the model facts are tracked and synchronized to IBM OpenPages for risk management, as shown in Figure 15.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.2066421508789, 367.82769775390625, 450.17156982421875, 377.36688232421875], "page": 21, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 15 Model facts that are tracked and synchronized to IBM OpenPages on an x86 platform", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/17"}, {"prov": [{"bbox": [536.0999755859375, 27.93828010559082, 547.2591552734375, 37.720638275146484], "page": 21, "span": [0, 2], "__ref_s3_data": null}], "text": "19", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.3836669921875, 699.1000366210938, 525.693115234375, 721.4442749023438], "page": 22, "span": [0, 94], "__ref_s3_data": null}], "text": "5. Create an external model by using IBM OpenScale on the x86 platform, as shown in Figure 16.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.4760971069336, 398.4433898925781, 295.7671203613281, 407.997802734375], "page": 22, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 16 Creating an external model on an x86 platform", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/18"}, {"prov": [{"bbox": [135.8937530517578, 339.7777404785156, 547.329345703125, 386.164794921875], "page": 22, "span": [0, 356], "__ref_s3_data": null}], "text": "IBM OpenScale provides a comprehensive dashboard that tracks fairness, quality monitoring, drift, and explainability of a model. Fairness determines whether your model produces biased outcomes. Quality determines how well your model predicts outcomes. Drift is the degradation of predictive performance over time. A sample is shown in Figure 17 on page 21.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.96902084350586, 27.93828010559082, 78.4020004272461, 37.76066970825195], "page": 22, "span": [0, 2], "__ref_s3_data": null}], "text": "20", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.654016494750977, 267.07440185546875, 37.2003059387207], "page": 22, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.54642486572266, 428.96380615234375, 386.5538024902344, 438.4689636230469], "page": 23, "span": [0, 76], "__ref_s3_data": null}], "text": "Figure 17 IBM OpenScale dashboard that is used to monitor the external model", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/19"}, {"prov": [{"bbox": [135.88458251953125, 358.59173583984375, 547.2167358398438, 416.6777038574219], "page": 23, "span": [0, 340], "__ref_s3_data": null}], "text": "You developed and deployed the AI model by using Watson Studio, WML on CP4D on Red Hat OpenShift on a virtual machine on IBM Z, and end-to-end AI model governance by leveraging AI Factsheets, OpenScale, and OpenPages on CP4D on a x86 platform. Figure 18 shows end-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.40601348876953, 57.904937744140625, 507.6827087402344, 67.22319793701172], "page": 23, "span": [0, 103], "__ref_s3_data": null}], "text": "Figure 18 Final result: End-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/20"}, {"prov": [{"bbox": [535.4241333007812, 27.93828010559082, 547.2591552734375, 37.61942672729492], "page": 23, "span": [0, 2], "__ref_s3_data": null}], "text": "21", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.80000305175781, 706.0162963867188, 389.3597717285156, 721.7960205078125], "page": 24, "span": [0, 42], "__ref_s3_data": null}], "text": "Use case 2: Credit default risk assessment", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.03933715820312, 655.13671875, 547.2247314453125, 689.2626953125], "page": 24, "span": [0, 265], "__ref_s3_data": null}], "text": "In today's world, many individuals or businesses seeking loans to meet their growing business needs often look to financial institutions. Financial institutions can offer loans to individuals or businesses and charge interest based on the current market situations.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.6015853881836, 622.3898315429688, 186.71859741210938, 635.3670654296875], "page": 24, "span": [0, 19], "__ref_s3_data": null}], "text": "Industry challenges", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.15077209472656, 563.016357421875, 547.2955322265625, 609.4694213867188], "page": 24, "span": [0, 366], "__ref_s3_data": null}], "text": "Financial institutions must make an accurate decision about whether to sanction a loan or not, and judging the likelihood of default is the difference between a successful and unsuccessful loan portfolio. In a traditional scenario, an experienced banker can judge someone's likelihood of default, but that is not an efficient method for judgment as a business grows.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.78274536132812, 530.6572875976562, 341.3221740722656, 543.4954833984375], "page": 24, "span": [0, 45], "__ref_s3_data": null}], "text": "Predictions of credit default risk assessment", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.8963165283203, 434.8458251953125, 547.2635498046875, 517.2132568359375], "page": 24, "span": [0, 579], "__ref_s3_data": null}], "text": "In the modern world, growing business institutions can no longer rely on only experienced bankers to decide whether to sanction a loan knowing that there is a probability that the borrower might default on their loans. A better choice is to rely on technological advancements that can help with reasoning based on facts, such as leveraging credit risk modeling techniques to process the historical data of past borrowers to understand their credit behavior and make a more informed decision about whether to lend money, how much money, and decide on the tenure to close the loan.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3209991455078, 389.1979064941406, 547.1471557617188, 423.1103210449219], "page": 24, "span": [0, 252], "__ref_s3_data": null}], "text": "Financial institutions can leverage AI solutions by using ML techniques to predict the credit risk. Applying AI to credit risk modeling techniques can benefit institutions in decision-making, and thus can help better manage the exposure to credit risk.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1620635986328, 331.1011047363281, 547.2008666992188, 377.3158874511719], "page": 24, "span": [0, 321], "__ref_s3_data": null}], "text": "Figure 19 on page 23 shows a sample architecture about how to design and develop an AI model for credit risk assessment on IBM Z. An IBM WebSpherefi Application Server is used for handling in-bound transactions, and CP4D is used for AI model lifecycle management that includes building, training, and deploying the model.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.99784469604492, 27.93828010559082, 78.4020004272461, 37.64631652832031], "page": 24, "span": [0, 2], "__ref_s3_data": null}], "text": "22", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.69827651977539, 267.07440185546875, 37.1955451965332], "page": 24, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.55125427246094, 439.1292724609375, 395.7654113769531, 448.4423828125], "page": 25, "span": [0, 82], "__ref_s3_data": null}], "text": "Figure 19 Architecture for credit risk prediction by using an ML AI model on IBM Z", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/21"}, {"prov": [{"bbox": [135.81259155273438, 380.48779296875, 547.34521484375, 426.5666809082031], "page": 25, "span": [0, 317], "__ref_s3_data": null}], "text": "A data scientist can leverage Watson Studio to develop and train an AI model and WML to deploy and score the model. In this sample architecture, the WML Python run time leverages the ML framework, IBM Snap Machine Learning (Snap ML), for scoring, can leverage an integrated AI accelerator at the time of model import.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.68914794921875, 310.3835754394531, 545.5831909179688, 368.8029479980469], "page": 25, "span": [0, 424], "__ref_s3_data": null}], "text": "Then, the banking loan approval team can send a loan applicant request to the IBM WebSphere Application Server, which can make a request to the AI inference endpoint. The AI inference engine scores the transaction and sends the result back to the loan approval team. Based on the results, the approval team can decide on whether to approve a loan or not, and also decide how much they can lend, timelines, and other factors.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.96060180664062, 264.4761962890625, 528.6572875976562, 298.59375], "page": 25, "span": [0, 232], "__ref_s3_data": null}], "text": "The transaction system that is shown in Figure 19 uses IBM WebSphere Liberty as an application server, but you also can use an IBM Open Libertyfi application server or any application server that can send RESTful API communications.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.14891052246094, 182.34982299804688, 547.310546875, 252.74313354492188], "page": 25, "span": [0, 498], "__ref_s3_data": null}], "text": "Models are frequently developed and tested in many platforms and languages, such as Python, Scala, R, and Go. Models can leverage ML frameworks like scikit-learn, Snap ML, or XGBoost, or DL frameworks like TensorFlow or PyTorch. Training a model can be done on any platform if you have enough computing power for complex models, but moving that model into production requires careful testing to ensure that transactions are not delayed, especially if you plan to run the model within a transaction.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.84275817871094, 148.16622924804688, 547.2376708984375, 170.75857543945312], "page": 25, "span": [0, 173], "__ref_s3_data": null}], "text": "We showed how IBM Z enable customers to use AI frameworks to detect credit risk. Now, we look at how you can leverage CP4D and TensorFlow on IBM Z to detect the credit risk.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [535.4790649414062, 27.93828010559082, 547.2591552734375, 37.54842758178711], "page": 25, "span": [0, 2], "__ref_s3_data": null}], "text": "23", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.0537338256836, 27.93828010559082, 78.4020004272461, 37.74729919433594], "page": 26, "span": [0, 2], "__ref_s3_data": null}], "text": "24", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.697975158691406, 267.07440185546875, 37.23467254638672], "page": 26, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.27874755859375, 710.9722900390625, 489.5701599121094, 721.4159545898438], "page": 26, "span": [0, 80], "__ref_s3_data": null}], "text": "Figure 20 shows an architecture for predicting credit risk by using DL on IBM Z.", "type": "caption", "name": "Caption", "font": null}, {"prov": [{"bbox": [64.493896484375, 421.176025390625, 344.1890563964844, 430.731689453125], "page": 26, "span": [0, 70], "__ref_s3_data": null}], "text": "Figure 20 Architecture for credit risk prediction by using DL on IBM Z", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/22"}, {"prov": [{"bbox": [136.09727478027344, 362.354736328125, 534.5686645507812, 408.55877685546875], "page": 26, "span": [0, 335], "__ref_s3_data": null}], "text": "Data scientists can start creating and training a DL AI model by using a Jupyter Notebook instance and Watson Studio. Then, they can deploy the model by using WML on CP4D running on IBM Z, which provides an endpoint. Other applications, including the IBM WebSphere server, can produce credit risk results by using the model's endpoint.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.12158203125, 328.7194519042969, 547.2158813476562, 350.3298645019531], "page": 26, "span": [0, 108], "__ref_s3_data": null}], "text": "In summary, here are some considerations for developing real-time AI models, such as credit risk assessment:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.60023498535156, 311.7396545410156, 522.9054565429688, 322.047607421875], "page": 26, "span": [0, 102], "__ref_s3_data": null}], "text": "GLYPH A preference for in-platform run times of the model, such as faster execution results.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.54747009277344, 294.29937744140625, 445.5369873046875, 304.6208801269531], "page": 26, "span": [0, 81], "__ref_s3_data": null}], "text": "GLYPH Less overhead in the end-to-end flows might improve scoring time.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.34133911132812, 265.28106689453125, 547.3232421875, 288.05682373046875], "page": 26, "span": [0, 168], "__ref_s3_data": null}], "text": "GLYPH If you are using models that are not deployable, CP4D offers a custom Python run time to build your own stack if they are not available on the platform.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.48162841796875, 236.74082946777344, 541.7804565429688, 258.6591491699219], "page": 26, "span": [0, 115], "__ref_s3_data": null}], "text": "GLYPH AI inferencing based on ML or DL models can increase the accuracy of better credit risk assessment.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.3498992919922, 195.72576904296875, 531.8067626953125, 230.00747680664062], "page": 26, "span": [0, 234], "__ref_s3_data": null}], "text": "GLYPH Using IBM z16 and on-chip AI acceleration with the Telum chip that is embedded with regular Integrated Facility for Linux (IFLs) provides an execution speed for your transactions that cannot be achieved by other means.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.78247833251953, 705.360595703125, 338.5379638671875, 721.9246826171875], "page": 27, "span": [0, 35], "__ref_s3_data": null}], "text": "Use case 3: Clearing and settlement", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9156036376953, 630.8539428710938, 539.5654907226562, 689.4520263671875], "page": 27, "span": [0, 433], "__ref_s3_data": null}], "text": "Clearing and settlements involve banks or financial institutions sending and receiving wire transfers by using secure interbank payments networks that can clear or settle numerous transactions. When an individual or business entity initiates a wire transfer, clearing begins the fund delivery process. Banks can begin the settlement phase either immediately after clearing takes place or later, mostly at the end of the business day.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.50138092041016, 598.3314208984375, 179.53228759765625, 611.3511352539062], "page": 27, "span": [0, 18], "__ref_s3_data": null}], "text": "Industry challenge", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.24351501464844, 563.0052490234375, 538.4359130859375, 585.32373046875], "page": 27, "span": [0, 184], "__ref_s3_data": null}], "text": "Banks and financial institutions must deal with high-risk transactions that can lead to loss. Moreover, these transactions can lead to regulatory violations and extra compliance costs.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.65296936035156, 530.1951904296875, 266.077880859375, 543.4844970703125], "page": 27, "span": [0, 32], "__ref_s3_data": null}], "text": "Clearing and settlement solution", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.9798126220703, 471.2395324707031, 545.6968383789062, 517.1358032226562], "page": 27, "span": [0, 277], "__ref_s3_data": null}], "text": "Use AI to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. The expedited remediation of questionable transactions can prevent costly consequences, regulatory violations, and negative business impacts.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.96380615234375, 389.0691223144531, 544.662109375, 459.2572937011719], "page": 27, "span": [0, 499], "__ref_s3_data": null}], "text": "In financial institutions, finding which financial transactions are legitimate and which transactions are fraudulent is of paramount importance. In this section, we go through a use case where we use AI to predict which trades or transactions have high risk exposures, and propose solutions for a more efficient settlement process. The expedited remediation of questionable transactions can prevent costly consequences, regulatory violations, and negative business impacts to financial institutions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.98394775390625, 319.3011169433594, 543.3472900390625, 377.23699951171875], "page": 27, "span": [0, 430], "__ref_s3_data": null}], "text": "The goal is to predict in real time whether the transaction being processed might be a fraudulent transaction or not. To achieve this goal, we build an ML model that can do this prediction for the financial institution. Because there would be many transactions being processed at any point by the financial institution, it is important to perform this prediction of fraudulent transactions in near-real time in a few milliseconds.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0880584716797, 261.0608825683594, 547.2335815429688, 307.2639465332031], "page": 27, "span": [0, 298], "__ref_s3_data": null}], "text": "One possible solution is to build and train a TensorFlow based DL model that learns from the historical data and predicts the fraudulent transactions. CP4D on IBM Z and IBM LinuxONE is a suitable product where this task can be achieved and the model deployed, and coming up with a serving endpoint.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [535.4534301757812, 27.93828010559082, 547.2591552734375, 37.55341339111328], "page": 27, "span": [0, 2], "__ref_s3_data": null}], "text": "25", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.914608001708984, 27.93828010559082, 78.4020004272461, 37.52546691894531], "page": 28, "span": [0, 2], "__ref_s3_data": null}], "text": "26", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.69200897216797, 267.07440185546875, 37.19856643676758], "page": 28, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [136.05018615722656, 699.2785034179688, 537.352294921875, 721.2307739257812], "page": 28, "span": [0, 146], "__ref_s3_data": null}], "text": "Figure 21 provides a high-level diagram of a clearing and settlement use case for financial transactions that uses CP4D on IBM Z and IBM LinuxONE.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.39627838134766, 467.5994567871094, 459.98809814453125, 477.4582214355469], "page": 28, "span": [0, 97], "__ref_s3_data": null}], "text": "Figure 21 Clearing and settlement use case for financial transactions by using Cloud Pak for Data", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/23"}, {"prov": [{"bbox": [136.23934936523438, 445.1138916015625, 353.37115478515625, 455.2171325683594], "page": 28, "span": [0, 50], "__ref_s3_data": null}], "text": "Here are the steps of the high-level process flow:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.8000030517578, 415.7784729003906, 524.740966796875, 438.1050720214844], "page": 28, "span": [0, 135], "__ref_s3_data": null}], "text": "1. Create a connection to a database (for example, an IBM Db2fi database) where the historical data will be used for ML model building.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.04005432128906, 387.2192687988281, 542.9837646484375, 409.4548034667969], "page": 28, "span": [0, 103], "__ref_s3_data": null}], "text": "2. Read the data from the database and prepare the data for AI by using the Data Refinery tool in CP4D.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.2370147705078, 345.5196533203125, 545.7424926757812, 380.059326171875], "page": 28, "span": [0, 188], "__ref_s3_data": null}], "text": "3. A Jupyter Notebook or JupyterLab IDE that is provided by the Watson Studio component in CP4D helps us build and train the AI model. The trained model can be saved into a WML repository.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.07083129882812, 328.86895751953125, 468.5547790527344, 339.33837890625], "page": 28, "span": [0, 71], "__ref_s3_data": null}], "text": "4. Deploy the saved model into a deployment space for batch deployment.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.43487548828125, 311.5809631347656, 417.2825622558594, 322.1788330078125], "page": 28, "span": [0, 62], "__ref_s3_data": null}], "text": "5. Create a batch deployment by using any of these interfaces:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.6441192626953, 294.92474365234375, 460.12939453125, 305.1920471191406], "page": 28, "span": [0, 67], "__ref_s3_data": null}], "text": "a. Watson Studio user interface from an Analytics deployment space.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.69277954101562, 277.7862243652344, 251.6896209716797, 288.1532287597656], "page": 28, "span": [0, 21], "__ref_s3_data": null}], "text": "b. WML Python client.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.7314910888672, 261.2212829589844, 244.95565795898438, 271.8070373535156], "page": 28, "span": [0, 17], "__ref_s3_data": null}], "text": "c. WML REST APIs.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.27818298339844, 243.74349975585938, 418.506591796875, 253.9542694091797], "page": 28, "span": [0, 61], "__ref_s3_data": null}], "text": "6. A hardware configuration can be chosen for the deployment.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.3677978515625, 214.9309539794922, 545.685791015625, 237.33485412597656], "page": 28, "span": [0, 160], "__ref_s3_data": null}], "text": "7. A batch deployment processes input data from a file, data connection, or connected data in a storage bucket, and writes the output to a selected destination.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.3165283203125, 185.6129150390625, 510.0397033691406, 207.9795379638672], "page": 28, "span": [0, 99], "__ref_s3_data": null}], "text": "8. One way to run batch deployment to predict or score is to create and run a batch deployment job.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.2800750732422, 168.84835815429688, 270.1285705566406, 179.39981079101562], "page": 28, "span": [0, 30], "__ref_s3_data": null}], "text": "9. Provide an input data type:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.57568359375, 151.5411834716797, 374.55621337890625, 161.9819793701172], "page": 28, "span": [0, 50], "__ref_s3_data": null}], "text": "a. Inline data for entering a JSON format payload.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.6676025390625, 135.0714874267578, 492.3292236328125, 145.2565460205078], "page": 28, "span": [0, 78], "__ref_s3_data": null}], "text": "b. Select Data asset , click Select data source , and then specify your asset.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.80201721191406, 117.79898834228516, 471.90997314453125, 127.81742095947266], "page": 28, "span": [0, 75], "__ref_s3_data": null}], "text": "10.The output data type can be a new output file or a connected data asset.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.80201721191406, 89.26384735107422, 546.2705688476562, 111.39698028564453], "page": 28, "span": [0, 93], "__ref_s3_data": null}], "text": "11.A Kubernetes admin can change the maximum number of concurrent batch jobs that can be run.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [136.80201721191406, 59.67621612548828, 531.2200927734375, 81.98482513427734], "page": 28, "span": [0, 98], "__ref_s3_data": null}], "text": "12.Get the deployment endpoint URL. For more information, see Getting the deployment endpoint URL.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.38957977294922, 708.2412719726562, 124.10479736328125, 721.2948608398438], "page": 29, "span": [0, 7], "__ref_s3_data": null}], "text": "Summary", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.63417053222656, 660.6920776367188, 532.9658813476562, 695.446533203125], "page": 29, "span": [0, 223], "__ref_s3_data": null}], "text": "With this use case, we attempted to demonstrate how to predict, in real time, whether the transaction that is being processed might be a fraudulent transaction or not. By using the method, you have the following advantages:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.5712432861328, 643.5092163085938, 374.5103454589844, 654.0914306640625], "page": 29, "span": [0, 63], "__ref_s3_data": null}], "text": "GLYPH No Impact to SLAs and the batch process window.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.46438598632812, 626.6151123046875, 508.5062255859375, 637.4598388671875], "page": 29, "span": [0, 97], "__ref_s3_data": null}], "text": "GLYPH Proactively stop losses, and lower operational, regulatory, and compliance costs.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.59495544433594, 598.0394287109375, 533.5338134765625, 620.7293701171875], "page": 29, "span": [0, 110], "__ref_s3_data": null}], "text": "GLYPH The solution is using a DL framework like TensorFlow for high-performing, low latency scoring.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [64.5902099609375, 554.939453125, 482.53704833984375, 571.1951293945312], "page": 29, "span": [0, 55], "__ref_s3_data": null}], "text": "Use case 4: Remaining Useful Life of an aircraft engine", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [136.06735229492188, 516.2785034179688, 545.7247314453125, 538.8272094726562], "page": 29, "span": [0, 108], "__ref_s3_data": null}], "text": "In this use case, we describe how an airline can deploy an AI model for inferencing by using IBMfi zSystems.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.07571411132812, 458.041015625, 547.2705688476562, 504.0214538574219], "page": 29, "span": [0, 339], "__ref_s3_data": null}], "text": "Remaining Useful Life (RUL) is the remaining time or cycles that an aircraft engine is likely to operate without any failure. In this case, it is the equivalent of the number of flights remaining for the engine after the last flight. By estimating RUL, the operator can decide on the next maintenance schedule and avoid unplanned downtime.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.3302459716797, 424.0997314453125, 525.1622924804688, 446.7457275390625], "page": 29, "span": [0, 114], "__ref_s3_data": null}], "text": "Figure 22 provides an overview of the inferencing architecture for the RUL of an aircraft engine when using IBM Z.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.4977035522461, 161.0757598876953, 244.7244415283203, 170.71823120117188], "page": 29, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 22 Inferencing architecture on IBM Z", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/24"}, {"prov": [{"bbox": [136.01132202148438, 66.6394271850586, 547.2557373046875, 148.60031127929688], "page": 29, "span": [0, 557], "__ref_s3_data": null}], "text": "Because we are looking into data-driven model development, the data set of our target is the run-to-failure data of the engine. We are looking into a supervised learning problem, and we use regression techniques to learn from the data. DL techniques such as Long Short-Term Memory (LSTM) or Gated Recurrent Units (GRU) are our choice because we are looking into a time series data set. TensorFlow or PyTorch frameworks are leveraged to create models. AI governance monitors the data and model drift to maintain the model quality throughout the model's life.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [535.3837280273438, 27.93828010559082, 547.2591552734375, 37.68974685668945], "page": 29, "span": [0, 2], "__ref_s3_data": null}], "text": "27", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [63.9516716003418, 27.93828010559082, 78.4020004272461, 37.51277542114258], "page": 30, "span": [0, 2], "__ref_s3_data": null}], "text": "28", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.685588836669922, 267.07440185546875, 37.18029022216797], "page": 30, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.9266815185547, 614.9383544921875, 547.2824096679688, 721.2327270507812], "page": 30, "span": [0, 754], "__ref_s3_data": null}], "text": "Open-source data from NASA was used to build the AI model, which then was deployed on CP4D. CP4D enables the data-scientist's journey from modeling to deployment in a seamless process. Data engineers leverage Db2 to host the data set, which includes the training, testing, and validation of a data set. Since data is hosted on Db2, you can expect low latency while retrieving the data and serve data security needs because Db2 is hosted on the IBM Z platform. Data is fetched by the data refinery to do the necessary pre-processing and data imputations. You can use the programming languages Golang or C++ for real-time predictions, depending on customer needs. For more information about this topic, see \"Use case 3: Clearing and settlement\" on page 25.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1148681640625, 557.2208862304688, 545.1770629882812, 603.3070678710938], "page": 30, "span": [0, 289], "__ref_s3_data": null}], "text": "Model building is done on Watson Studio, leveraging the high-performance computing hardware on IBM Z. You can train the model anywhere (on your own hardware or the cloud) and bring the model directly into CP4D, which provides data scientists with the flexibility of implementation choices.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.87142944335938, 510.6993713378906, 547.212890625, 545.31201171875], "page": 30, "span": [0, 249], "__ref_s3_data": null}], "text": "We used LSTM to build the AI model and used the training data. The model was continuously evaluated to model convergence. The final model is tested with the test data, which is never exposed at the time of training to make sure that the model works.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.99119567871094, 453.0931396484375, 547.3082885742188, 499.47491455078125], "page": 30, "span": [0, 304], "__ref_s3_data": null}], "text": "This model is deployed on WML on CP4D and runs on IBM Z. If required, the trained model can be converted to the Open Neural Network Exchange (ONNX) format before deployment. Based on project requirements, IBM Z supports high-throughput, low latency inference requirements by leveraging an AI accelerator.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.13357543945312, 371.263671875, 547.282470703125, 441.48968505859375], "page": 30, "span": [0, 495], "__ref_s3_data": null}], "text": "For decision-making about an aircraft engine's life, it is important to be able to explain the model predictions from end to end. This explainability may be global or local. Global explainability enables decision-makers to evaluate the trained model in general from the subject matter expert (SME) point of view. Local explainability enables the operator to validate the reasons behind the present inference and relate it to the past data points, which are an indicative cause of the prediction.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.87094116210938, 313.0604553222656, 547.32421875, 359.3154296875], "page": 30, "span": [0, 273], "__ref_s3_data": null}], "text": "The AI governance components such as IBM OpenScale on CP4D support explainability and manages the drifts in data and concept. OpenPages and AI FactSheet together can alert the stakeholders about important events through a dashboard and allow course correction at any point.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9972686767578, 255.09030151367188, 547.2745361328125, 301.46954345703125], "page": 30, "span": [0, 318], "__ref_s3_data": null}], "text": "Client-side applications can invoke a REST apiserver that handles some preprocessing of an incoming request before initiating the inference pipeline. Efficiencies might be needed in real-time applications, and inference response time can be reduced by adopting low-level programming while components are communicating.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.4759063720703, 220.89736938476562, 521.204345703125, 243.2855987548828], "page": 30, "span": [0, 121], "__ref_s3_data": null}], "text": "Figure 23 on page 29 provides a more in-depth view of the architecture of an AI-based predictive maintenance application.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.46824645996094, 340.3482971191406, 216.07789611816406, 349.8203430175781], "page": 31, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 23 In-depth architectural view", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/25"}, {"prov": [{"bbox": [136.14920043945312, 305.67156982421875, 501.76422119140625, 327.83966064453125], "page": 31, "span": [0, 106], "__ref_s3_data": null}], "text": "In summary, consider the following points while developing an AI-based predictive maintenance application:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.6149139404297, 264.79376220703125, 547.3203125, 299.5812683105469], "page": 31, "span": [0, 204], "__ref_s3_data": null}], "text": "GLYPH CP4D offers a Python run time to build a custom solution stack, but also supports different components like Watson Studio, WML, Db2, Data Refinery, OpenScale, AI Factsheets, and OpenPages.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.60621643066406, 247.86065673828125, 491.34027099609375, 257.88116455078125], "page": 31, "span": [0, 96], "__ref_s3_data": null}], "text": "GLYPH The trustworthiness of the predicted output is important for critical use cases.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.48333740234375, 219.0656280517578, 534.4998779296875, 241.49769592285156], "page": 31, "span": [0, 118], "__ref_s3_data": null}], "text": "GLYPH IBM Z provides high data security and low latency requirements at scale for the critical applications.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4168243408203, 190.18487548828125, 547.2156982421875, 212.0349884033203], "page": 31, "span": [0, 141], "__ref_s3_data": null}], "text": "GLYPH A data scientist can choose to train the model and deploy it on CP4D seamlessly with the latest tech stack that is available.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.57362365722656, 160.84478759765625, 504.0291748046875, 183.60284423828125], "page": 31, "span": [0, 128], "__ref_s3_data": null}], "text": "GLYPH The AIOps and MLOps supported by CP4D to track AI model and data lifecycle throughout the application lifecycle.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [535.4987182617188, 27.93828010559082, 547.2591552734375, 37.63166046142578], "page": 31, "span": [0, 2], "__ref_s3_data": null}], "text": "29", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.60496520996094, 687.0557861328125, 542.2593994140625, 721.791015625], "page": 32, "span": [0, 83], "__ref_s3_data": null}], "text": "Use case 5: AI-powered video analytics on an infant's motions for health prediction", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.98846435546875, 612.279052734375, 546.6989135742188, 670.7722778320312], "page": 32, "span": [0, 395], "__ref_s3_data": null}], "text": "Each year, approximately 5 million newborns worldwide are suffering from a neuro-developmental disorder. Due to the lack of early diagnoses and intervention, many infants are disabled and abandoned, especially in countries with limited numbers of pediatricians with extensive experience in neuro-developmental disorders. This situation is a conundrum that plagues many families around the world.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0279998779297, 553.5992431640625, 547.3121948242188, 600.06591796875], "page": 32, "span": [0, 280], "__ref_s3_data": null}], "text": "Infant motion analysis plays critical importance to understanding and comprehending healthy childhood development. In infants, monitoring their poses provides information about their health that can lead to a better prediction of early developmental risk assessment and diagnosis.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.17251586914062, 508.10137939453125, 533.3443603515625, 542.4315185546875], "page": 32, "span": [0, 264], "__ref_s3_data": null}], "text": "Adults use different techniques and methods to express their feelings (like sick, happy, stressed, or hungry), but this case is usually different for infants who cannot express their feelings. Based on the baby movements, AI can predict their expression or health.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.91824340820312, 426.2819519042969, 535.1302490234375, 496.59674072265625], "page": 32, "span": [0, 442], "__ref_s3_data": null}], "text": "In this use case, we examine how AI-powered video analytics can assist new parents and hospitals by addressing pose-based real-time body movements of the infants (such as arching back, head banging, kicking legs, rubbing eyes, stretching, and sucking fingers). During the initial months of a baby's life, spontaneous movements might indicate later developmental disorders, such as cerebral palsy, Rett syndrome, and autism spectrum disorders.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.6669921875, 393.6462707519531, 186.71859741210938, 406.78857421875], "page": 32, "span": [0, 19], "__ref_s3_data": null}], "text": "Industry challenges", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.72630310058594, 334.298828125, 547.2576293945312, 380.78509521484375], "page": 32, "span": [0, 323], "__ref_s3_data": null}], "text": "There are video surveillance systems that are installed for monitoring an infant's movement in many hospitals or homes so that any problem can be witnessed and potentially even stopped before they take place. These systems require much manual work to monitor the real-stream videos and intervene when a problem is detected.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.09779357910156, 264.0821228027344, 547.2775268554688, 322.5864562988281], "page": 32, "span": [0, 391], "__ref_s3_data": null}], "text": "There is a certain amount of trust that you must place on the person who monitors a surveillance system to ensure that the job is being done effectively and efficiently, and that the surveillance system is being vigilantly watched. Because of the dependency on these manual efforts, you need something \"smart\" that monitors constantly the surveillance system and detect problems effectively.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.74172973632812, 217.7925262451172, 547.2385864257812, 252.07472229003906], "page": 32, "span": [0, 248], "__ref_s3_data": null}], "text": "AI is shaping the controls of surveillance that can map and track occurrences with self-learning abilities, AI can improve on human operations and analyze video footage in real time to alert the hospitals or parents if any anomalies are identified.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9100341796875, 183.46926879882812, 541.7665405273438, 206.01150512695312], "page": 32, "span": [0, 173], "__ref_s3_data": null}], "text": "Video processing a stream of data from surveillance systems and then performing advance analytics and detecting anomalies quickly is a significant challenge in the industry.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.55262756347656, 150.96932983398438, 278.4443054199219, 164.3548583984375], "page": 32, "span": [0, 36], "__ref_s3_data": null}], "text": "Infant motion analytics in real time", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.83998107910156, 67.68016052246094, 547.2257080078125, 137.958740234375], "page": 32, "span": [0, 519], "__ref_s3_data": null}], "text": "AI is the current \"market trend evolution\" in video analytics and advancing the decision-making capabilities of the human mind. DL-based computer vision AI techniques are being widely adopted by various industries to solve real-time problems. These techniques improve the detection and prediction accuracy without increasing the hardware cost exponentially. For users, AI greatly reduces the workload of the monitoring staff and provides benefits by detecting unusual incidents and solving many video forensic problems.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.2401351928711, 27.93828010559082, 78.4020004272461, 37.58808135986328], "page": 32, "span": [0, 2], "__ref_s3_data": null}], "text": "30", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.673728942871094, 267.07440185546875, 37.15589141845703], "page": 32, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.79975891113281, 614.259521484375, 71.44307708740234, 623.4725341796875], "page": 33, "span": [0, 1], "__ref_s3_data": null}], "text": "S", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.60729217529297, 368.9632873535156, 281.4980773925781, 378.5414733886719], "page": 33, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 24 Architecture for AI-powered video analytics", "type": "caption", "name": "Caption", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/26"}, {"prov": [{"bbox": [135.84861755371094, 274.2659912109375, 547.311279296875, 356.6263732910156], "page": 33, "span": [0, 575], "__ref_s3_data": null}], "text": "Live camera feeds or recorded videos of an infant's movement are the inputs for a pose detection model. This video streaming data was stored in IBM Cloudfi Object Storage for image processing. Video data must be transformed into frames so that the infant's body poses can be detected. These post-estimation components of the pipeline predict the location of all 17-person key points with 3 degrees of freedom each (x, y location and visibility) plus two virtual alignment key points. This approach also embraces a compute-intensive heat map prediction of infant body posture.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.89413452148438, 192.76055908203125, 539.9171752929688, 262.6158447265625], "page": 33, "span": [0, 454], "__ref_s3_data": null}], "text": "When changes in body posture or movement happen, analytics can be performed, and a threshold can be set for the angle of the body and posture movements. An analysis can be performed on movement that is based on that threshold to help to predict an infant's health index in the output video stream by leveraging the IBM z16 on-chip AI acceleration, which provides an execution speed in real time on an edge device, which cannot be achieved by other means.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.61978149414062, 170.2261962890625, 437.95953369140625, 180.65838623046875], "page": 33, "span": [0, 68], "__ref_s3_data": null}], "text": "We can leverage the following AI technology stack for this use case:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.57650756835938, 141.35748291015625, 546.6869506835938, 163.9613494873047], "page": 33, "span": [0, 117], "__ref_s3_data": null}], "text": "GLYPH Convolutional neural network: Build an artificial neural network model on video streaming and images.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.63107299804688, 124.72181701660156, 455.69329833984375, 134.6374969482422], "page": 33, "span": [0, 80], "__ref_s3_data": null}], "text": "GLYPH TensorFlow: A DL back-end framework that is based on TensorFlow.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.57284545898438, 95.74219512939453, 543.4529418945312, 117.91780090332031], "page": 33, "span": [0, 120], "__ref_s3_data": null}], "text": "GLYPH Mediapipe: A library that helps with video streaming processing and prediction of human pose estimation.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.501220703125, 78.27716064453125, 516.3308715820312, 88.64454650878906], "page": 33, "span": [0, 96], "__ref_s3_data": null}], "text": "GLYPH OpenCV: A real-time computer vision library that helps perform image processing.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [535.5914916992188, 27.93828010559082, 547.2591552734375, 37.50794219970703], "page": 33, "span": [0, 2], "__ref_s3_data": null}], "text": "31", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.94410705566406, 686.981201171875, 540.1576538085938, 721.435791015625], "page": 33, "span": [0, 258], "__ref_s3_data": null}], "text": "CP4D was used to build and deploy the AI-powered video analytics on infant's motion for health prediction use case on IBM Z. IBM Z with AI accelerator enables faster inference for detecting face and body movements and performing angle analytics in real time.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0972137451172, 629.25927734375, 542.9444580078125, 675.5055541992188], "page": 33, "span": [0, 288], "__ref_s3_data": null}], "text": "Figure 24 shows an architectural diagram about how to design and develop an AI model for real-time body pose detection on IBM Z. A deep convolutional neural network architecture was trained on the task of infant pose estimation on the custom data set by leveraging IBM Cloud Pak for Data.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.89791870117188, 687.1724243164062, 542.3601684570312, 721.3072509765625], "page": 34, "span": [0, 238], "__ref_s3_data": null}], "text": "WML was used for deployment of the pose detection model and generated notifications to users with web and mobile applications, and it integrates with Fitbit for push notifications so that hospitals and parents can take preventive actions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.27169799804688, 644.0363159179688, 223.8605499267578, 659.7733154296875], "page": 34, "span": [0, 20], "__ref_s3_data": null}], "text": "Additional resources", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.46092224121094, 605.2584838867188, 547.2325439453125, 627.453857421875], "page": 34, "span": [0, 149], "__ref_s3_data": null}], "text": "GLYPH The Cloud Pak for Data 4.5 on IBM Z Overview Demo video provides an overview of some of the more important features of CP4D on IBM Z.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.48532104492188, 588.2786865234375, 300.0693054199219, 598.7608642578125], "page": 34, "span": [0, 49], "__ref_s3_data": null}], "text": "GLYPH IBM Cloud Pak for Data Tutorials.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.57339477539062, 559.299072265625, 518.558837890625, 581.29833984375], "page": 34, "span": [0, 149], "__ref_s3_data": null}], "text": "GLYPH Here are some additional use cases that use the data science frameworks that are available as part of CP4D on IBM Z and IBM LinuxONE:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.18695068359375, 529.6796875, 527.840576171875, 552.08740234375], "page": 34, "span": [0, 127], "__ref_s3_data": null}], "text": "-Payment Card Fraud Detection by using TensorFlow on CP4D on IBM Z and IBM LinuxONE is a payment card fraud detection use case.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.0952606201172, 501.1863708496094, 539.9617919921875, 523.5965576171875], "page": 34, "span": [0, 152], "__ref_s3_data": null}], "text": "-Fashion-MNIST clothing classification with PyTorch on Cloud Pak for Data on IBM Z and IBM LinuxONE is a Fashion-MNIST clothing classification use case.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [151.3111572265625, 448.2712097167969, 547.2676391601562, 494.79150390625], "page": 34, "span": [0, 319], "__ref_s3_data": null}], "text": "-Payment Card Fraud Prevention by using Snap ML on IBM Cloud Pak for Data on Red Hat OpenShift on a virtual machine on IBM Z and IBM LinuxONE, which leverage the z16 integrated AI accelerator describes a use case that uses Snap Machine Learning in Cloud Pak for Data on IBM Z and IBM LinuxONE. It is a Snap ML use case.", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [164.91055297851562, 419.2614440917969, 547.1928100585938, 441.53619384765625], "page": 34, "span": [0, 131], "__ref_s3_data": null}], "text": "A companion video can be found at Credit Card Fraud Detection by using Snap ML on IBM Cloud Pak for Data on IBM Z and IBM LinuxONE.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.38446807861328, 375.9455871582031, 137.7028350830078, 391.6693420410156], "page": 34, "span": [0, 7], "__ref_s3_data": null}], "text": "Summary", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.69834899902344, 253.239990234375, 547.1648559570312, 359.5583801269531], "page": 34, "span": [0, 734], "__ref_s3_data": null}], "text": "This IBM Redbooksfi publication presented an overview of how IBM Cloud Pak for Data on IBM Z can modernize your data infrastructure; develop and deploy ML and AI models; and instantiate highly efficient analytics deployment on IBM LinuxONE. This publication demonstrated these tasks by guiding the reader through five common use cases where CP4D on IBM Z and IBM LinuxONE uses the different features that are supported on the platform, and showing how the associated features can help an enterprise to build AI and ML models with core transactional data, which results in a highly efficient analytics deployment that minimizes latency, cost inefficiencies, and potential security exposures that are connected with data transportation.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.984222412109375, 220.6373748779297, 114.55732727050781, 233.51852416992188], "page": 34, "span": [0, 7], "__ref_s3_data": null}], "text": "Authors", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.27400970458984, 27.93828010559082, 78.4020004272461, 37.550392150878906], "page": 34, "span": [0, 2], "__ref_s3_data": null}], "text": "32", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.730152130126953, 267.07440185546875, 37.189125061035156], "page": 34, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [135.93601989746094, 185.25857543945312, 538.573486328125, 207.30662536621094], "page": 34, "span": [0, 112], "__ref_s3_data": null}], "text": "This publication was produced by a team of specialists from around the world working with the IBM Redbooks team:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.83291625976562, 115.17806243896484, 547.2435913085938, 173.28330993652344], "page": 34, "span": [0, 393], "__ref_s3_data": null}], "text": "Jasmeet Bhatia is an AI on IBM Z Product Manager who supports CP4D on IBM Z. She has 2.5 years of combined experience as a data scientist and a product manager. Jasmeet lives in San Francisco, California and holds a Bachelor of Arts degree in Data Science. She is working on her Master of Science degree in Data Science. Her area of expertise includes AI, data science, and product management.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.073974609375, 651.14697265625, 546.0402221679688, 721.3735961914062], "page": 35, "span": [0, 505], "__ref_s3_data": null}], "text": "Ravi Gummadi is a Technical Leader for CP4D on Linux on IBM Z and IBM LinuxONE in India. He has 18+ years of experience in the design and development of enterprise software for various platforms, including IBM Z and IBM LinuxONE. He holds a master's degree in computer science and engineering from the Indian Institute of Technology Madras (IIT Madras). His areas of expertise include compilers, virtualization, big data analytics, containers, data, and AI, with a special focus on open-source ecosystems.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.9738311767578, 568.9080200195312, 546.8887329101562, 639.3916015625], "page": 35, "span": [0, 501], "__ref_s3_data": null}], "text": "Chandra Shekhar Reddy Potula is a Lead AI on zSystems team Architect for Linux on IBM Z and LinuxONE in India. He has 18+ years of experience in the design and development of enterprise software and firmware for various platforms, including IBM Z and LinuxONE. He holds a degree in computer science of engineering from Jawaharlal Nehru Technological University (JNTU). His areas of expertise include networking, virtualization, containers, data, and AI, with a special focus on open-source ecosystems.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.81300354003906, 439.12628173828125, 547.256103515625, 557.2632446289062], "page": 35, "span": [0, 828], "__ref_s3_data": null}], "text": "Srirama Sharma is a Lead Technical Architect for IBM Cloud Pak, IBM Instanafi, IBM Turbonomicfi, and Red Hat Advanced Cluster Management for Kubernetes (RHACM) on IBM Z and LinuxONE. He has 18+ years of experience in UNIX and Linux application and device driver development. He designs ISV solutions on IBM Systems and IBM Blockchainfi. He also works on cloud-native adoption of enterprise solutions on IBM Z and LinuxONE. Srirama holds a Bachelor of Engineering degree in computer science from Visvesvaraya Technological University (VTU). He lives in Bangalore, Karnataka. His areas of expertise include UNIX and Linux systems programming, virtualization, performance benchmarking of Financial Services Sector (FSS) industry solutions, open-source ecosystems, server infrastructure, and cloud-native adoption and modernization.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.1691436767578, 417.03826904296875, 432.8396301269531, 427.28643798828125], "page": 35, "span": [0, 71], "__ref_s3_data": null}], "text": "Thanks to the following people for their contributions to this project:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.17153930664062, 382.91351318359375, 314.4146423339844, 405.3429870605469], "page": 35, "span": [0, 65], "__ref_s3_data": null}], "text": "Lydia Parziale, Project Manager IBM Redbooks, Poughkeepsie Center", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.13253784179688, 349.3034973144531, 364.24346923828125, 371.6414489746094], "page": 35, "span": [0, 54], "__ref_s3_data": null}], "text": "Shin Kelly Yang, AI on IBM Z Product Management IBM US", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.0904998779297, 315.28411865234375, 537.7623291015625, 337.33740234375], "page": 35, "span": [0, 87], "__ref_s3_data": null}], "text": "Tom Ramey, Anna Shugol, Andrew Sica, Jonathan Sloan, Elpida Tzortzatos, Meeta Vouk, IBM", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.80000305175781, 282.2770690917969, 349.12164306640625, 295.41973876953125], "page": 35, "span": [0, 43], "__ref_s3_data": null}], "text": "Now you can become a published author, too!", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.90232849121094, 187.2394561767578, 547.3480224609375, 269.3630065917969], "page": 35, "span": [0, 559], "__ref_s3_data": null}], "text": "Here's an opportunity to spotlight your skills, grow your career, and become a published author-all at the same time! Join an IBM Redbooks residency project and help write a book in your area of expertise, while honing your experience using leading-edge technologies. Your efforts will help to increase product acceptance and customer satisfaction, as you expand your network of technical contacts and relationships. Residencies run from two to six weeks in length, and you can participate either in person or as a remote resident working from your home base.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.51300048828125, 164.8619842529297, 547.3431396484375, 175.372802734375], "page": 35, "span": [0, 91], "__ref_s3_data": null}], "text": "Find out more about the residency program, browse the residency index, and apply online at:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [136.80099487304688, 148.4791259765625, 301.6788024902344, 157.64808654785156], "page": 35, "span": [0, 34], "__ref_s3_data": null}], "text": "ibm.com /redbooks/residencies.html", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [535.5746459960938, 27.93828010559082, 547.2591552734375, 37.47697448730469], "page": 35, "span": [0, 2], "__ref_s3_data": null}], "text": "33", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.46549987792969, 708.3641967773438, 270.4855651855469, 721.534423828125], "page": 36, "span": [0, 30], "__ref_s3_data": null}], "text": "Stay connected to IBM Redbooks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [135.5939178466797, 685.2982788085938, 241.2664794921875, 695.3732299804688], "page": 36, "span": [0, 36], "__ref_s3_data": null}], "text": "GLYPH Find us on LinkedIn:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.66708374023438, 667.9544677734375, 391.0767822265625, 677.9544677734375], "page": 36, "span": [0, 48], "__ref_s3_data": null}], "text": "http://www.linkedin.com/groups?home=&gid=2130806", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [135.4261932373047, 639.2014770507812, 546.8383178710938, 661.0797729492188], "page": 36, "span": [0, 118], "__ref_s3_data": null}], "text": "GLYPH Explore new Redbooks publications, residencies, and workshops with the IBM Redbooks weekly newsletter:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.72427368164062, 622.588134765625, 451.4742736816406, 632.6220703125], "page": 36, "span": [0, 60], "__ref_s3_data": null}], "text": "https://www.redbooks.ibm.com/Redbooks.nsf/subscribe?OpenForm", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [135.66847229003906, 605.2367553710938, 430.1478271484375, 615.342041015625], "page": 36, "span": [0, 76], "__ref_s3_data": null}], "text": "GLYPH Stay current on recent Redbooks publications with RSS Feeds:", "type": "paragraph", "name": "List-item", "font": null}, {"prov": [{"bbox": [150.44317626953125, 588.5687866210938, 331.0777282714844, 598.3783569335938], "page": 36, "span": [0, 36], "__ref_s3_data": null}], "text": "http://www.redbooks.ibm.com/rss.html", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.26653289794922, 27.93828010559082, 78.4020004272461, 37.59064865112305], "page": 36, "span": [0, 2], "__ref_s3_data": null}], "text": "34", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 27.662551879882812, 267.07440185546875, 37.169410705566406], "page": 36, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.80000305175781, 695.9519653320312, 151.5048065185547, 718.752197265625], "page": 37, "span": [0, 7], "__ref_s3_data": null}], "text": "Notices", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.00486755371094, 629.8663330078125, 547.2454833984375, 659.926025390625], "page": 37, "span": [0, 254], "__ref_s3_data": null}], "text": "This information was developed for products and services offered in the US. This material might be available from IBM in other languages. However, you may be required to own a copy of the product or product version in that language in order to access it.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.13760375976562, 559.7279663085938, 547.1796875, 619.8883056640625], "page": 37, "span": [0, 625], "__ref_s3_data": null}], "text": "IBM may not offer the products, services, or features discussed in this document in other countries. Consult your local IBM representative for information on the products and services currently available in your area. Any reference to an IBM product, program, or service is not intended to state or imply that only that IBM product, program, or service may be used. Any functionally equivalent product, program, or service that does not infringe any IBM intellectual property right may be used instead. However, it is the user's responsibility to evaluate and verify the operation of any non-IBM product, program, or service.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.0386962890625, 519.6210327148438, 547.3560180664062, 550.1978149414062], "page": 37, "span": [0, 232], "__ref_s3_data": null}], "text": "IBM may have patents or pending patent applications covering subject matter described in this document. The furnishing of this document does not grant you any license to these patents. You can send license inquiries, in writing, to:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.49063873291016, 509.9056701660156, 535.3104248046875, 520.2186889648438], "page": 37, "span": [0, 99], "__ref_s3_data": null}], "text": "IBM Director of Licensing, IBM Corporation, North Castle Drive, MD-NC119, Armonk, NY 10504-1785, US", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.12965393066406, 449.7742004394531, 545.7673950195312, 500.42437744140625], "page": 37, "span": [0, 411], "__ref_s3_data": null}], "text": "INTERNATIONAL BUSINESS MACHINES CORPORATION PROVIDES THIS PUBLICATION \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Some jurisdictions do not allow disclaimer of express or implied warranties in certain transactions, therefore, this statement may not apply to you.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.0031509399414, 400.1820983886719, 547.2484130859375, 439.9192810058594], "page": 37, "span": [0, 345], "__ref_s3_data": null}], "text": "This information could include technical inaccuracies or typographical errors. Changes are periodically made to the information herein; these changes will be incorporated in new editions of the publication. IBM may make improvements and/or changes in the product(s) and/or the program(s) described in this publication at any time without notice.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.96153259277344, 359.86328125, 541.5413208007812, 389.9772033691406], "page": 37, "span": [0, 286], "__ref_s3_data": null}], "text": "Any references in this information to non-IBM websites are provided for convenience only and do not in any manner serve as an endorsement of those websites. The materials at those websites are not part of the materials for this IBM product and use of those websites is at your own risk.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.42230224609375, 329.4605407714844, 519.2667846679688, 350.0104675292969], "page": 37, "span": [0, 136], "__ref_s3_data": null}], "text": "IBM may use or distribute any of the information you provide in any way it believes appropriate without incurring any obligation to you.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.80852127075195, 299.43133544921875, 505.2710266113281, 319.78839111328125], "page": 37, "span": [0, 191], "__ref_s3_data": null}], "text": "The performance data and client examples cited are presented for illustrative purposes only. Actual performance results may vary depending on specific configurations and operating conditions.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.22393798828125, 249.75856018066406, 547.2424926757812, 289.9355163574219], "page": 37, "span": [0, 408], "__ref_s3_data": null}], "text": "Information concerning non-IBM products was obtained from the suppliers of those products, their published announcements or other publicly available sources. IBM has not tested those products and cannot confirm the accuracy of performance, compatibility or any other claims related to non-IBM products. Questions on the capabilities of non-IBM products should be addressed to the suppliers of those products.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.30084991455078, 219.74908447265625, 544.685791015625, 239.94190979003906], "page": 37, "span": [0, 146], "__ref_s3_data": null}], "text": "Statements regarding IBM's future direction or intent are subject to change or withdrawal without notice, and represent goals and objectives only.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.958980560302734, 170.14593505859375, 547.016357421875, 209.852294921875], "page": 37, "span": [0, 335], "__ref_s3_data": null}], "text": "This information contains examples of data and reports used in daily business operations. To illustrate them as completely as possible, the examples include the names of individuals, companies, brands, and products. All of these names are fictitious and any similarity to actual people or business enterprises is entirely coincidental.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.37278747558594, 150.16616821289062, 172.50048828125, 160.1703643798828], "page": 37, "span": [0, 18], "__ref_s3_data": null}], "text": "COPYRIGHT LICENSE:", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [63.9893798828125, 59.65068817138672, 547.3580932617188, 140.0205841064453], "page": 37, "span": [0, 779], "__ref_s3_data": null}], "text": "This information contains sample application programs in source language, which illustrate programming techniques on various operating platforms. You may copy, modify, and distribute these sample programs in any form without payment to IBM, for the purposes of developing, using, marketing or distributing application programs conforming to the application programming interface for the operating platform for which the sample programs are written. These examples have not been thoroughly tested under all conditions. IBM, therefore, cannot guarantee or imply reliability, serviceability, or function of these programs. The sample programs are provided \"AS IS\", without warranty of any kind. IBM shall not be liable for any damages arising out of your use of the sample programs.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.854331970214844, 27.89427947998047, 180.32760620117188, 37.42281723022461], "page": 37, "span": [0, 27], "__ref_s3_data": null}], "text": "' Copyright IBM Corp. 2023.", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [535.5529174804688, 27.93828010559082, 547.2591552734375, 37.60062026977539], "page": 37, "span": [0, 2], "__ref_s3_data": null}], "text": "35", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [64.16413116455078, 706.0162963867188, 155.489501953125, 721.5309448242188], "page": 38, "span": [0, 10], "__ref_s3_data": null}], "text": "Trademarks", "type": "subtitle-level-1", "name": "Section-header", "font": null}, {"prov": [{"bbox": [64.14505004882812, 649.2987060546875, 547.2343139648438, 689.3553466796875], "page": 38, "span": [0, 383], "__ref_s3_data": null}], "text": "IBM, the IBM logo, and ibm.com are trademarks or registered trademarks of International Business Machines Corporation, registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the web at \"Copyright and trademark information\" at http://www.ibm.com/legal/copytrade.shtml", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.75762176513672, 619.232177734375, 547.241455078125, 639.3633422851562], "page": 38, "span": [0, 181], "__ref_s3_data": null}], "text": "The following terms are trademarks or registered trademarks of International Business Machines Corporation, and might also be trademarks or registered trademarks in other countries.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Table", "type": "table", "$ref": "#/tables/0"}, {"prov": [{"bbox": [64.11603546142578, 523.834716796875, 312.0052490234375, 533.9044189453125], "page": 38, "span": [0, 54], "__ref_s3_data": null}], "text": "The following terms are trademarks of other companies:", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.29424285888672, 493.69439697265625, 528.6849365234375, 514.6287841796875], "page": 38, "span": [0, 184], "__ref_s3_data": null}], "text": "Intel, Intel logo, Intel Inside logo, and Intel Centrino logo are trademarks or registered trademarks of Intel Corporation or its subsidiaries in the United States and other countries.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [63.84184265136719, 464.0928955078125, 541.6887817382812, 484.438720703125], "page": 38, "span": [0, 174], "__ref_s3_data": null}], "text": "The registered trademark Linuxfi is used pursuant to a sublicense from the Linux Foundation, the exclusive licensee of Linus Torvalds, owner of the mark on a worldwide basis.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.25360870361328, 434.2598876953125, 531.9806518554688, 454.5678405761719], "page": 38, "span": [0, 140], "__ref_s3_data": null}], "text": "Red Hat and OpenShift are trademarks or registered trademarks of Red Hat, Inc. or its subsidiaries in the United States and other countries.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.4344482421875, 414.2238464355469, 472.0943908691406, 424.56378173828125], "page": 38, "span": [0, 90], "__ref_s3_data": null}], "text": "UNIX is a registered trademark of The Open Group in the United States and other countries.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.17461395263672, 394.30035400390625, 465.3721618652344, 404.4769287109375], "page": 38, "span": [0, 86], "__ref_s3_data": null}], "text": "Other company, product, or service names may be trademarks or service marks of others.", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [64.34241485595703, 26.91827964782715, 78.4020004272461, 36.332603454589844], "page": 38, "span": [0, 2], "__ref_s3_data": null}], "text": "36", "type": "page-footer", "name": "Page-footer", "font": null}, {"prov": [{"bbox": [93.42030334472656, 26.466215133666992, 267.07440185546875, 36.090301513671875], "page": 38, "span": [0, 38], "__ref_s3_data": null}], "text": "IBM Cloud Pak for Data on IBM zSystems", "type": "page-footer", "name": "Page-footer", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/27"}, {"prov": [{"bbox": [287.2200012207031, 741.251953125, 415.20721435546875, 763.4519653320312], "page": 40, "span": [0, 10], "__ref_s3_data": null}], "text": "Back cover", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [496.1397399902344, 670.4779663085938, 564.1908569335938, 680.523193359375], "page": 40, "span": [0, 12], "__ref_s3_data": null}], "text": "REDP-5695-00", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [482.7099609375, 649.478271484375, 564.5999145507812, 659.0663452148438], "page": 40, "span": [0, 15], "__ref_s3_data": null}], "text": "ISBN 0738461067", "type": "paragraph", "name": "Text", "font": null}, {"prov": [{"bbox": [497.399169921875, 89.81710052490234, 564.1929321289062, 99.43573760986328], "page": 40, "span": [0, 17], "__ref_s3_data": null}], "text": "Printed in U.S.A.", "type": "paragraph", "name": "Text", "font": null}, {"name": "Picture", "type": "figure", "$ref": "#/figures/28"}], "figures": [{"prov": [{"bbox": [409.23211669921875, 713.666259765625, 568.517822265625, 757.0501708984375], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [0.12108886241912842, 90.19225311279297, 610.4767456054688, 504.2667541503906], "page": 1, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [33.33281326293945, 552.1343383789062, 238.89004516601562, 721.9103393554688], "page": 3, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [135.98939514160156, 345.3135986328125, 436.0746154785156, 714.1400756835938], "page": 5, "span": [0, 16], "__ref_s3_data": null}], "text": "Figure 1 IBM z16", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.93318176269531, 415.7976989746094, 547.8917236328125, 685.1072998046875], "page": 6, "span": [0, 33], "__ref_s3_data": null}], "text": "Figure 2 IBM Z: Processor roadmap", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [434.496826171875, 442.9707946777344, 543.5748901367188, 660.1309204101562], "page": 7, "span": [0, 52], "__ref_s3_data": null}], "text": "Figure 3 System design of IBM z16 LinuxONE Emperor 4", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [65.18315887451172, 430.1353454589844, 547.7926635742188, 684.5646362304688], "page": 8, "span": [0, 78], "__ref_s3_data": null}], "text": "Figure 4 IBM z16 on-chip AI Accelerator integration with IBM Z processor cores", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.25979614257812, 492.203369140625, 548.035888671875, 714.2471923828125], "page": 9, "span": [0, 29], "__ref_s3_data": null}], "text": "Figure 5 Seamless integration", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.54902648925781, 310.984375, 547.7777709960938, 612.0947265625], "page": 10, "span": [0, 48], "__ref_s3_data": null}], "text": "Figure 6 Solution overview of Cloud Pak for Data", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.61245346069336, 441.5110168457031, 547.817138671875, 714.1513061523438], "page": 13, "span": [0, 104], "__ref_s3_data": null}], "text": "Figure 7 Developing, training, and deploying an AI model on Cloud Pak for Data on IBM Z and IBM LinuxONE", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.72257995605469, 462.15850830078125, 547.6842041015625, 660.9888916015625], "page": 16, "span": [0, 35], "__ref_s3_data": null}], "text": "Figure 8 Typical AI model lifecycle", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.82352828979492, 501.3348693847656, 548.0906982421875, 714.08984375], "page": 18, "span": [0, 54], "__ref_s3_data": null}], "text": "Figure 9 Remote AI governance solution end-to-end flow", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.964263916015625, 125.87242126464844, 547.62890625, 423.3223571777344], "page": 18, "span": [0, 49], "__ref_s3_data": null}], "text": "Figure 10 Creating a model entry in IBM OpenPages", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.76428985595703, 378.5587158203125, 547.38916015625, 672.571533203125], "page": 19, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 11 Training an AI model by using Watson Studio", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.11590576171875, 67.91944122314453, 547.7499389648438, 318.0794982910156], "page": 19, "span": [0, 66], "__ref_s3_data": null}], "text": "Figure 12 Deploying an AI model by using WML on Cloud Pak for Data", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [136.353515625, 406.53900146484375, 533.409912109375, 661.0449829101562], "page": 20, "span": [0, 24], "__ref_s3_data": null}], "text": "Figure 13 External model", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.07536315917969, 83.89727020263672, 547.8724975585938, 346.0033874511719], "page": 20, "span": [0, 28], "__ref_s3_data": null}], "text": "Figure 14 Tracking the model", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.772430419921875, 380.2639465332031, 547.672119140625, 684.7722778320312], "page": 21, "span": [0, 91], "__ref_s3_data": null}], "text": "Figure 15 Model facts that are tracked and synchronized to IBM OpenPages on an x86 platform", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.67052459716797, 410.38824462890625, 547.7557983398438, 684.6401977539062], "page": 22, "span": [0, 55], "__ref_s3_data": null}], "text": "Figure 16 Creating an external model on an x86 platform", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.84904861450195, 440.8315734863281, 547.9327392578125, 713.8433837890625], "page": 23, "span": [0, 76], "__ref_s3_data": null}], "text": "Figure 17 IBM OpenScale dashboard that is used to monitor the external model", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.19013214111328, 69.50128936767578, 547.7117919921875, 344.5738220214844], "page": 23, "span": [0, 103], "__ref_s3_data": null}], "text": "Figure 18 Final result: End-to-end AI governance when using IBM OpenPages, AI Factsheets, and OpenScale", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.70671081542969, 451.1697692871094, 545.1655883789062, 713.8380737304688], "page": 25, "span": [0, 82], "__ref_s3_data": null}], "text": "Figure 19 Architecture for credit risk prediction by using an ML AI model on IBM Z", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.45086669921875, 433.2137756347656, 547.7979736328125, 696.4598388671875], "page": 26, "span": [0, 70], "__ref_s3_data": null}], "text": "Figure 20 Architecture for credit risk prediction by using DL on IBM Z", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.91328811645508, 479.8487548828125, 542.5390625, 684.5452270507812], "page": 28, "span": [0, 97], "__ref_s3_data": null}], "text": "Figure 21 Clearing and settlement use case for financial transactions by using Cloud Pak for Data", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.87143325805664, 173.84889221191406, 539.6433715820312, 410.2047119140625], "page": 29, "span": [0, 43], "__ref_s3_data": null}], "text": "Figure 22 Inferencing architecture on IBM Z", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [63.89833068847656, 352.471923828125, 547.9695434570312, 714.0540771484375], "page": 31, "span": [0, 37], "__ref_s3_data": null}], "text": "Figure 23 In-depth architectural view", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [64.11891174316406, 381.7127380371094, 541.8325805664062, 614.5540161132812], "page": 33, "span": [0, 53], "__ref_s3_data": null}], "text": "Figure 24 Architecture for AI-powered video analytics", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [42.87873458862305, 15.771553993225098, 69.14901733398438, 43.16151428222656], "page": 40, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}, {"prov": [{"bbox": [433.9858703613281, 19.161434173583984, 572.2542114257812, 54.680973052978516], "page": 40, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "figure", "bounding-box": null}], "tables": [{"prov": [{"bbox": [75.00436401367188, 546.11572265625, 487.3641357421875, 611.8494262695312], "page": 38, "span": [0, 0], "__ref_s3_data": null}], "text": "", "type": "table", "#-cols": 3, "#-rows": 6, "data": [[{"bbox": [75.5999984741211, 601.1370239257812, 98.19540405273438, 609.4619750976562], "spans": [[0, 0]], "text": "Db2fi IBMfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [236.40029907226562, 601.13623046875, 292.0049743652344, 609.461181640625], "spans": [[0, 1]], "text": "IBM Watsonfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1]}, {"bbox": [397.2005920410156, 601.1354370117188, 455.17950439453125, 609.4603881835938], "spans": [[0, 2]], "text": "Redbooks (log o) fi Turbon", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1]}], [{"bbox": null, "spans": [[1, 0]], "text": "", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [236.40029907226562, 590.15625, 278.3294982910156, 598.481201171875], "spans": [[1, 1]], "text": "IBM z16\u2122", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2]}, {"bbox": [425.6900939941406, 590.1554565429688, 451.2635803222656, 598.4804077148438], "spans": [[1, 2]], "text": "omicfi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2]}], [{"bbox": [75.5999984741211, 579.1167602539062, 144.1304931640625, 587.4417114257812], "spans": [[2, 0]], "text": "IBM Blockchainfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [236.40029907226562, 579.115966796875, 272.4696044921875, 587.44091796875], "spans": [[2, 1]], "text": "Instanafi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3]}, {"bbox": [397.2005920410156, 579.1151733398438, 451.2527770996094, 587.4401245117188], "spans": [[2, 2]], "text": "WebSpherefi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3]}], [{"bbox": [75.5999984741211, 568.1367797851562, 112.57559204101562, 576.4617309570312], "spans": [[3, 0]], "text": "IBM Cloudfi IBM Clou", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [236.40029907226562, 568.135986328125, 294.3809509277344, 576.4609375], "spans": [[3, 1]], "text": "Open Libertyfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4]}, {"bbox": [397.2005920410156, 568.1351928710938, 423.8045959472656, 576.4601440429688], "spans": [[3, 2]], "text": "z/OSfi", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4]}], [{"bbox": [112.60798645019531, 557.1567993164062, 142.21170043945312, 565.4817504882812], "spans": [[4, 0]], "text": "d Pakfi", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [236.40029907226562, 557.156005859375, 290.44708251953125, 565.48095703125], "spans": [[4, 1]], "text": "OpenPagesfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5]}, {"bbox": [397.2005920410156, 557.1552124023438, 420.6365966796875, 565.4801635742188], "spans": [[4, 2]], "text": "z16\u2122", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5]}], [{"bbox": [75.5999984741211, 546.1165161132812, 128.0511016845703, 554.4414672851562], "spans": [[5, 0]], "text": "IBM Telum\u2122", "type": "body", "col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": [236.40029907226562, 546.11572265625, 283.47210693359375, 554.440673828125], "spans": [[5, 1]], "text": "Redbooksfi", "type": "body", "col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6]}, {"bbox": null, "spans": [[5, 2]], "text": "", "type": "body", "col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6]}]], "model": null, "bounding-box": null}], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}, {"height": 792.0, "page": 10, "width": 612.0}, {"height": 792.0, "page": 11, "width": 612.0}, {"height": 792.0, "page": 12, "width": 612.0}, {"height": 792.0, "page": 13, "width": 612.0}, {"height": 792.0, "page": 14, "width": 612.0}, {"height": 792.0, "page": 15, "width": 612.0}, {"height": 792.0, "page": 16, "width": 612.0}, {"height": 792.0, "page": 17, "width": 612.0}, {"height": 792.0, "page": 18, "width": 612.0}, {"height": 792.0, "page": 19, "width": 612.0}, {"height": 792.0, "page": 20, "width": 612.0}, {"height": 792.0, "page": 21, "width": 612.0}, {"height": 792.0, "page": 22, "width": 612.0}, {"height": 792.0, "page": 23, "width": 612.0}, {"height": 792.0, "page": 24, "width": 612.0}, {"height": 792.0, "page": 25, "width": 612.0}, {"height": 792.0, "page": 26, "width": 612.0}, {"height": 792.0, "page": 27, "width": 612.0}, {"height": 792.0, "page": 28, "width": 612.0}, {"height": 792.0, "page": 29, "width": 612.0}, {"height": 792.0, "page": 30, "width": 612.0}, {"height": 792.0, "page": 31, "width": 612.0}, {"height": 792.0, "page": 32, "width": 612.0}, {"height": 792.0, "page": 33, "width": 612.0}, {"height": 792.0, "page": 34, "width": 612.0}, {"height": 792.0, "page": 35, "width": 612.0}, {"height": 792.0, "page": 36, "width": 612.0}, {"height": 792.0, "page": 37, "width": 612.0}, {"height": 792.0, "page": 38, "width": 612.0}, {"height": 792.0, "page": 39, "width": 612.0}, {"height": 792.0, "page": 40, "width": 612.0}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/verify_utils.py b/tests/verify_utils.py index 0e1a7d9e..3dd94682 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -122,6 +122,10 @@ def verify_md(doc_pred_md, doc_true_md): return doc_pred_md == doc_true_md +def verify_dt(doc_pred_dt, doc_true_dt): + return doc_pred_dt == doc_true_dt + + def verify_conversion_result( input_path: Path, doc_result: ConversionResult, generate=False ): @@ -134,10 +138,12 @@ def verify_conversion_result( doc_pred_pages: List[Page] = doc_result.pages doc_pred: DsDocument = doc_result.output doc_pred_md = doc_result.render_as_markdown() + doc_pred_dt = doc_result.render_as_doctags() pages_path = input_path.with_suffix(".pages.json") json_path = input_path.with_suffix(".json") md_path = input_path.with_suffix(".md") + dt_path = input_path.with_suffix(".doctags.txt") if generate: # only used when re-generating truth with open(pages_path, "w") as fw: @@ -148,6 +154,9 @@ def verify_conversion_result( with open(md_path, "w") as fw: fw.write(doc_pred_md) + + with open(dt_path, "w") as fw: + fw.write(doc_pred_dt) else: # default branch in test with open(pages_path, "r") as fr: doc_true_pages = PageList.validate_json(fr.read()) @@ -158,6 +167,9 @@ def verify_conversion_result( with open(md_path, "r") as fr: doc_true_md = fr.read() + with open(dt_path, "r") as fr: + doc_true_dt = fr.read() + assert verify_cells( doc_pred_pages, doc_true_pages ), f"Mismatch in PDF cell prediction for {input_path}" @@ -173,3 +185,7 @@ def verify_conversion_result( assert verify_md( doc_pred_md, doc_true_md ), f"Mismatch in Markdown prediction for {input_path}" + + assert verify_dt( + doc_pred_dt, doc_true_dt + ), f"Mismatch in DocTags prediction for {input_path}" From 8df7afa5509096630a1a3b93ec6a0cddfd4618fa Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sat, 21 Sep 2024 06:43:37 +0200 Subject: [PATCH 4/8] updated the README Signed-off-by: Peter Staar --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0cb0c95c..2631d3ca 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,9 @@ from docling.document_converter import DocumentConverter source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL converter = DocumentConverter() result = converter.convert_single(source) + print(result.render_as_markdown()) # output: "## Docling Technical Report[...]" +print(result.render_as_doctags()) # output: "<page_1><loc_20>..." ``` ### Convert a batch of documents From 5285017ba9e36ba1f303922b33a67afdd0871418 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <dol@zurich.ibm.com> Date: Mon, 23 Sep 2024 15:38:44 +0200 Subject: [PATCH 5/8] fix poetry lock Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --- poetry.lock | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/poetry.lock b/poetry.lock index 2f9bb4cb..423f1f7b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -863,18 +863,26 @@ files = [ {file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:c69e055b98d0a22267a1d0b6139801aecc5b7386289b89f53f976ab723352728"}, {file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3eaa245e5ac4ab3e9d0c95a93e23f58d61d70f11431b76b6705fae358eb31c62"}, {file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:63d195f6c5b30f4f908436589cffd4a5b9e18553c44c57fb635068a2afbd7fab"}, + {file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c9296a2e417a30bf030de0c7c2e2cce4773c58bead039d5e6fccbf7deb2269"}, + {file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:166b9958d3a8a98d0671a1e3fdf8083ded9ccf12c2ab80fb9709908a2cf81784"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:13bea2b4e8c04647ec743c3feb1ee66c784db542ab9dbed8dad7eb66fca74b70"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:c5b8b8e2207615ff99e535f00548c7b0b8e4ca4593e59edd83fcad98fc318284"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ba74868243caf5ac850fff7c45c8a372c1cac0193431e22eb41888d45ac79719"}, {file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7815b06aa1c3953488496f191ce0265d0ee7bed5a6b96454a5f9d6f1add28f69"}, + {file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1a7dd2a1e63cee47f6090ebfebc15f68d24f61d5f4f45a21f22120b2267798d"}, + {file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52bd2934a27fdc9db5f2d0713dbeec0c94e5c5843d29996e85d641a11498ad0"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:fd4d0d4ff853e566b05769c704a4ea3c050c0cfc5721e4e2035e550fb2a8fe91"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:802a59a8a3bea1801bce848d58d19fcdbbcea27d9e2c23f163419d13cdec2345"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1ead7958bc044000a8d43cce53c9b82be0d341b0ca5cf7b39a0c09f9c4fd8ceb"}, {file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:312cf2b0b6560c8dfe5331a5a80a0ed5cb409d29ee6cc999a81696774d50f5e7"}, + {file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc3d6f6ca2cffbe5e112818c8aba9a783af8ab7cffff04624bfb5bf8d185b707"}, + {file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc537d5e9d108233b7e7249c6739292dc9c36a0f39c11e7f430700df35ff884"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:4db0a700c08ff2d6285461dc5f4a68ccd36876a59b62131f847dc4be76a85989"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:f1041c44d1a4d1a43a324781795b03edfdfd8076c49a610c4dd384c86f2a6236"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:efb0e9678fe07640bd9b6dc07651eaf1f8e5d5602e379b4cf78dbcddc62b50e9"}, {file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:f8d46922d74339ec7fd7a6933220ebc36b2ff39738ad9bb74ea55a198dd31b2f"}, + {file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2872de101ce6d262f57afd3f4d68452064c214c5ab001b7ac698a948e0725314"}, + {file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187da7dabc11317badbf6983ee508c367299eb39ed78938623206be6b21e41bd"}, ] [package.dependencies] From e57e223ab7062f8d345768ba49b153cf4e0b5fc1 Mon Sep 17 00:00:00 2001 From: Christoph Auer <cau@zurich.ibm.com> Date: Mon, 23 Sep 2024 16:34:32 +0200 Subject: [PATCH 6/8] Fix formatting problems Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --- docling/datamodel/document.py | 2 +- tests/verify_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index fe0507b8..2f086b23 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -389,7 +389,7 @@ def render_as_doctags( # table specific flags add_table_cell_location=add_table_cell_location, add_table_cell_label=add_table_cell_label, - add_table_cell_text=add_table_cell_text + add_table_cell_text=add_table_cell_text, ) def render_element_images( diff --git a/tests/verify_utils.py b/tests/verify_utils.py index 3dd94682..f75ed614 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -156,7 +156,7 @@ def verify_conversion_result( fw.write(doc_pred_md) with open(dt_path, "w") as fw: - fw.write(doc_pred_dt) + fw.write(doc_pred_dt) else: # default branch in test with open(pages_path, "r") as fr: doc_true_pages = PageList.validate_json(fr.read()) @@ -168,7 +168,7 @@ def verify_conversion_result( doc_true_md = fr.read() with open(dt_path, "r") as fr: - doc_true_dt = fr.read() + doc_true_dt = fr.read() assert verify_cells( doc_pred_pages, doc_true_pages @@ -188,4 +188,4 @@ def verify_conversion_result( assert verify_dt( doc_pred_dt, doc_true_dt - ), f"Mismatch in DocTags prediction for {input_path}" + ), f"Mismatch in DocTags prediction for {input_path}" From 08cee99cb6a848cf7a7a4a8f416f589c760a7c3b Mon Sep 17 00:00:00 2001 From: Peter Staar <taa@zurich.ibm.com> Date: Mon, 23 Sep 2024 18:16:56 +0200 Subject: [PATCH 7/8] fixed the doctag export in docling/utils/export.py Signed-off-by: Peter Staar <taa@zurich.ibm.com> --- docling/utils/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/utils/export.py b/docling/utils/export.py index e9e56930..033c6d65 100644 --- a/docling/utils/export.py +++ b/docling/utils/export.py @@ -111,7 +111,7 @@ def _process_page(): ) # No page-tagging since we only do 1 page at the time content_dt = doc.export_to_document_tokens( - main_text_start=start_ix, main_text_stop=end_ix, page_tagging=False + main_text_start=start_ix, main_text_stop=end_ix, add_page_index=False ) return content_text, content_md, content_dt, page_cells, page_segments, page From 43f8b9182d24f6ae2e794f9de3baab394e7a9c2b Mon Sep 17 00:00:00 2001 From: Michele Dolfi <dol@zurich.ibm.com> Date: Mon, 23 Sep 2024 19:17:53 +0200 Subject: [PATCH 8/8] propagate xsize and ysize Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --- docling/datamodel/document.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 2f086b23..c6bdfcb2 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -383,6 +383,8 @@ def render_as_doctags( main_text_start=main_text_start, main_text_stop=main_text_stop, main_text_labels=main_text_labels, + xsize=xsize, + ysize=ysize, add_location=add_location, add_content=add_content, add_page_index=add_page_index,