diff --git a/tests/tests/python-language-html2parquet/pail/data/requirements.txt b/tests/tests/python-language-html2parquet/pail/data/requirements.txt
index dce2e58d..29b09e63 100644
--- a/tests/tests/python-language-html2parquet/pail/data/requirements.txt
+++ b/tests/tests/python-language-html2parquet/pail/data/requirements.txt
@@ -1,6 +1,5 @@
trafilatura==1.12.2
-lxml-html-clean==0.2.2
+lxml_html_clean
-# we can probably update to 18+, but we will have to re-generate expected output as pyarrow 18 seems to have resulted in a binary format change
-pyarrow<18
+pyarrow
diff --git a/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz b/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz
index f99747d5..7919cbe5 100644
Binary files a/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz and b/tests/tests/python-language-html2parquet/pail/test-data/expected/html_zip.parquet.gz differ
diff --git a/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz b/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz
index b231835b..d457dde8 100644
Binary files a/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz and b/tests/tests/python-language-html2parquet/pail/test-data/expected/test1.parquet.gz differ