diff --git a/poetry.lock b/poetry.lock index 0f1021f..ad02d49 100644 --- a/poetry.lock +++ b/poetry.lock @@ -740,24 +740,24 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "cloudpathlib" -version = "0.18.1" +version = "0.19.0" description = "pathlib-style classes for cloud storage services." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "cloudpathlib-0.18.1-py3-none-any.whl", hash = "sha256:20efd5d772c75df91bb2ac52e053be53fd9000f5e9755fd92375a2a9fe6005e0"}, - {file = "cloudpathlib-0.18.1.tar.gz", hash = "sha256:ffd22f324bfbf9c3f2bc1bec6e8372cb372a0feef17c7f2b48030cd6810ea859"}, + {file = "cloudpathlib-0.19.0-py3-none-any.whl", hash = "sha256:eb7758648812d5906af44f14cf9a6a64f687342a6f547a1c20deb7241d769dcb"}, + {file = "cloudpathlib-0.19.0.tar.gz", hash = "sha256:919edbfd9a4e935d2423da210b143df89cb0ec6d378366a0dffa2e9fd0664fe8"}, ] [package.dependencies] -boto3 = {version = "*", optional = true, markers = "extra == \"s3\""} +boto3 = {version = ">=1.34.0", optional = true, markers = "extra == \"s3\""} typing_extensions = {version = ">4", markers = "python_version < \"3.11\""} [package.extras] all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"] -azure = ["azure-storage-blob (>=12)"] +azure = ["azure-storage-blob (>=12)", "azure-storage-file-datalake (>=12)"] gs = ["google-cloud-storage"] -s3 = ["boto3"] +s3 = ["boto3 (>=1.34.0)"] [[package]] name = "colorama" @@ -1446,12 +1446,12 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" @@ -2475,44 +2475,6 @@ Scrapy = ">=1.1.0" six = ">=1.10.0" warcio = ">=1.3.3" -[[package]] -name = "news-please" -version = "1.6.13" -description = "news-please is an open source easy-to-use news extractor that just works." -optional = false -python-versions = "*" -files = [ - {file = "news_please-1.6.13-py3-none-any.whl", hash = "sha256:0c378cc0c388cb5051e22493665bb714ab7ca057cf1c568ed015b1adbccbf373"}, - {file = "news_please-1.6.13.tar.gz", hash = "sha256:4ccbe8d947fd77eca7c45b958bcdbf3a49b760493ff68a74e13b77adf36209b7"}, -] - -[package.dependencies] -ago = ">=0.0.9" -beautifulsoup4 = ">=4.3.2" -boto3 = "*" -bs4 = "*" -dotmap = ">=1.2.17" -elasticsearch = ">=2.4" -faust-cchardet = ">=2.1.18" -hjson = ">=1.5.8" -"hurry.filesize" = ">=0.9" -langdetect = ">=1.0.7" -lxml = ">=3.3.5" -lxml-html-clean = ">=0.1.1" -newspaper4k = ">=0.9.3.1" -plac = ">=0.9.6" -psycopg2-binary = ">=2.8.4" -PyDispatcher = ">=2.0.5" -PyMySQL = ">=0.7.9" -python-dateutil = ">=2.4.0" -pywin32 = {version = ">=220", markers = "sys_platform == \"win32\""} -readability-lxml = ">=0.6.2" -redis = "*" -Scrapy = ">=1.1.0" -six = ">=1.10.0" -typing-extensions = ">=4.7.0" -warcio = ">=1.3.3" - [[package]] name = "newspaper3k" version = "0.2.8" @@ -2539,42 +2501,6 @@ requests = ">=2.10.0" tinysegmenter = "0.3" tldextract = ">=2.0.1" -[[package]] -name = "newspaper4k" -version = "0.9.3.1" -description = "Simplified python article discovery & extraction." -optional = false -python-versions = ">=3.8,<4.0" -files = [ - {file = "newspaper4k-0.9.3.1-py3-none-any.whl", hash = "sha256:42a03b7915d92941a9fe4cc8dab47240219560e0cb8ecb5a291dc5a913eb8aa4"}, - {file = "newspaper4k-0.9.3.1.tar.gz", hash = "sha256:fc237ae6a7b65d5ac4df224f962b2d7368c991fdf63b5176e439a1b74a2992e0"}, -] - -[package.dependencies] -beautifulsoup4 = ">=4.9.3" -feedparser = ">=6.0.0" -lxml = ">=4.2.0" -nltk = ">=3.6.6" -numpy = {version = ">=1.25", markers = "python_version >= \"3.11\""} -pandas = {version = ">=2.1.0", markers = "python_version >= \"3.11\""} -Pillow = ">=4.0.0" -python-dateutil = ">=2.6.1" -PyYAML = ">=5.1" -requests = ">=2.26.0" -tldextract = ">=2.0.1" - -[package.extras] -all = ["cloudscraper (>=1.2.0)", "gnews (>=0.3.6)", "indic-nlp-library (>=0.90)", "jieba (>=0.42.1)", "pythainlp (>=2.3.2)", "tinysegmenter (>=0.4)"] -bn = ["indic-nlp-library (>=0.90)"] -cloudflare = ["cloudscraper (>=1.2.0)"] -gnews = ["gnews (>=0.3.6)"] -hi = ["indic-nlp-library (>=0.90)"] -ja = ["tinysegmenter (>=0.4)"] -np = ["indic-nlp-library (>=0.90)"] -ta = ["indic-nlp-library (>=0.90)"] -th = ["pythainlp (>=2.3.2)"] -zh = ["jieba (>=0.42.1)"] - [[package]] name = "nltk" version = "3.9.1" @@ -2739,9 +2665,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3267,6 +3193,19 @@ files = [ {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"}, {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"}, {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"}, + {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"}, + {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"}, + {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"}, + {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"}, + {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"}, ] [package.dependencies] @@ -3326,8 +3265,8 @@ files = [ annotated-types = ">=0.4.0" pydantic-core = "2.20.1" typing-extensions = [ - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, + {version = ">=4.6.1", markers = "python_version < \"3.13\""}, ] [package.extras] @@ -3945,24 +3884,6 @@ lxml = "*" [package.extras] test = ["timeout-decorator"] -[[package]] -name = "redis" -version = "5.0.8" -description = "Python client for Redis database and key-value store" -optional = false -python-versions = ">=3.7" -files = [ - {file = "redis-5.0.8-py3-none-any.whl", hash = "sha256:56134ee08ea909106090934adc36f65c9bcbbaecea5b21ba704ba6fb561f8eb4"}, - {file = "redis-5.0.8.tar.gz", hash = "sha256:0c5b10d387568dfe0698c6fad6615750c24170e548ca2deac10c649d463e9870"}, -] - -[package.dependencies] -async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""} - -[package.extras] -hiredis = ["hiredis (>1.0.0)"] -ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] - [[package]] name = "regex" version = "2024.7.24" @@ -4885,4 +4806,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "6d8ae32d68899a069364bd00d68981b0747fb982c6df5b091c54a9a172238e87" +content-hash = "ace5618ff922a62fd73e74c226687deb51d1df907716ab3390ff3b6539095bd2" diff --git a/pyproject.toml b/pyproject.toml index 6fcc609..d4bf5a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ playwright = "^1.35.0" readability-lxml = "^0.8.1" bleach = "^6.1.0" python-json-logger = "^2.0.4" -cloudpathlib = { extras = ["s3"], version = "^0.18.1" } +cloudpathlib = { extras = ["s3"], version = "^0.19.0" } PyMuPDF = "^1.24.9" google-cloud-translate = "^3.15.5" psutil = "^6.0.0"