From fa9699fa3cd2d367382d7b952d0365983a870848 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos <100353117+nikos-livathinos@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:50:35 +0200 Subject: [PATCH] fix(tests): Adjust the test data to match the new version of LayoutPredictor (#82) * fix(tests): Adjust the test data to match the new version of LayoutPredictor from docling-ibm-models Signed-off-by: Nikos Livathinos * chore: Update poetry to use `docling-ibm-models` at version `v1.2.0` Signed-off-by: Nikos Livathinos --------- Signed-off-by: Nikos Livathinos --- poetry.lock | 15 +++++---------- pyproject.toml | 2 +- tests/data/redp5110.md | 10 ++++++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/poetry.lock b/poetry.lock index b8459ad6..efe729fd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -977,13 +977,13 @@ tabulate = ">=0.9.0,<0.10.0" [[package]] name = "docling-ibm-models" -version = "1.1.7" +version = "1.2.0" description = "This package contains the AI models used by the Docling PDF conversion package" optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "docling_ibm_models-1.1.7-py3-none-any.whl", hash = "sha256:a118c9a3b7cdcd6ee6190f6475cb56ca1a1a52f7e6632932cf9d5de5217098aa"}, - {file = "docling_ibm_models-1.1.7.tar.gz", hash = "sha256:d4699f24e32c314ecd6fe89e9067f1abe8808f83dd9c13d05d423361e92edc7d"}, + {file = "docling_ibm_models-1.2.0-py3-none-any.whl", hash = "sha256:1bad8fb67ab1ff71a6120530c76272e48b71c5829383d381927e8e51c2204eee"}, + {file = "docling_ibm_models-1.2.0.tar.gz", hash = "sha256:e5558c66433603a7acfe0dd9e7bc12e99680af9484b26cf3e61e03b1cbdd3e2d"}, ] [package.dependencies] @@ -6610,11 +6610,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -7228,4 +7223,4 @@ examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitte [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "f50b5f6158b688cc25f80253e3cec8e60d852d66a90fe8eb96798ea3c2372019" +content-hash = "7a7b6ef730f468cc3d1c3054c6638362d5e50cfef48d6e76ea3a4fe534dd1ccd" diff --git a/pyproject.toml b/pyproject.toml index 1a2c0a0a..128020cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ packages = [{include = "docling"}] python = "^3.10" pydantic = "^2.0.0" docling-core = "^1.3.0" -docling-ibm-models = "^1.1.7" +docling-ibm-models = "^1.2.0" deepsearch-glm = "^0.21.0" filetype = "^1.2.0" pypdfium2 = "^4.30.0" diff --git a/tests/data/redp5110.md b/tests/data/redp5110.md index 7b518f6f..0d064ac8 100644 --- a/tests/data/redp5110.md +++ b/tests/data/redp5110.md @@ -1776,6 +1776,10 @@ An important design and implementation consideration is the fact that RCAC colum An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values. +SELECT + +FROM GROUP BY ORDER BY + ## Without RCAC Masking ## With RCAC Masking @@ -1808,6 +1812,12 @@ Figure 6-1 Timing of column masking | **** **** **** 1234 | 750.33 | | **** **** **** 0001 | 10.00 | +CREDIT_CARD_NUMBER, SUM(AMOUNT) AS TOTAL TRANSACTIONS + +CREDIT_CARD_NUMBER + +CREDIT_CARD_NUMBER; + Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2. Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask.