diff --git a/tests/tests/python-language-html2parquet/pail/data/requirements.txt b/tests/tests/python-language-html2parquet/pail/data/requirements.txt index dce2e58d..29b09e63 100644 --- a/tests/tests/python-language-html2parquet/pail/data/requirements.txt +++ b/tests/tests/python-language-html2parquet/pail/data/requirements.txt @@ -1,6 +1,5 @@ trafilatura==1.12.2 -lxml-html-clean==0.2.2 +lxml_html_clean -# we can probably update to 18+, but we will have to re-generate expected output as pyarrow 18 seems to have resulted in a binary format change -pyarrow<18 +pyarrow diff --git a/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz b/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz index f99747d5..7919cbe5 100644 Binary files a/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz and b/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz differ diff --git a/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz b/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz index b231835b..d457dde8 100644 Binary files a/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz and b/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz differ