From 36682c1738bc8b25209eb79771d0616832c94d85 Mon Sep 17 00:00:00 2001 From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com> Date: Mon, 18 Nov 2024 15:12:38 -1000 Subject: [PATCH] [data][api] implement `HudiDataSource` (#46273) Support read from Hudi table into Ray dataset. --------- Signed-off-by: Shiyan Xu <2701446+xushiyan@users.noreply.github.com> --- .../config/vocabularies/Data/accept.txt | 1 + doc/source/data/api/input_output.rst | 9 ++ python/ray/data/BUILD | 8 ++ python/ray/data/__init__.py | 2 + .../_internal/datasource/hudi_datasource.py | 91 ++++++++++++++ .../data/hudi-tables/0.x_cow_partitioned.zip | Bin 0 -> 46757 bytes python/ray/data/read_api.py | 53 ++++++++ python/ray/data/tests/test_hudi.py | 114 ++++++++++++++++++ .../ml/data-test-requirements.txt | 3 +- python/requirements_compiled.txt | 2 + 10 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 python/ray/data/_internal/datasource/hudi_datasource.py create mode 100644 python/ray/data/examples/data/hudi-tables/0.x_cow_partitioned.zip create mode 100644 python/ray/data/tests/test_hudi.py diff --git a/.vale/styles/config/vocabularies/Data/accept.txt b/.vale/styles/config/vocabularies/Data/accept.txt index 8ec78bd70bce..1104d6f3cd41 100644 --- a/.vale/styles/config/vocabularies/Data/accept.txt +++ b/.vale/styles/config/vocabularies/Data/accept.txt @@ -7,6 +7,7 @@ Data('s)? [Dd]iscretizer(s)? dtype [Gg]roupby +[Hh]udi [Ii]ndexable [Ii]ngest [Ii]nqueue(s)? diff --git a/doc/source/data/api/input_output.rst b/doc/source/data/api/input_output.rst index bb8d791d98b2..51bd7ecedb13 100644 --- a/doc/source/data/api/input_output.rst +++ b/doc/source/data/api/input_output.rst @@ -186,6 +186,15 @@ Delta Sharing read_delta_sharing_tables +Hudi +---- + +.. autosummary:: + :nosignatures: + :toctree: doc/ + + read_hudi + Iceberg ------- diff --git a/python/ray/data/BUILD b/python/ray/data/BUILD index d232ab352ba0..21b4e0d829b6 100644 --- a/python/ray/data/BUILD +++ b/python/ray/data/BUILD @@ -225,6 +225,14 @@ py_test( deps = ["//:ray_lib", ":conftest"], ) +py_test( + name = "test_hudi", + size = "small", + srcs = ["tests/test_hudi.py"], + tags = ["team:data", "exclusive"], + deps = ["//:ray_lib", ":conftest"], +) + py_test( name = "test_image", size = "small", diff --git a/python/ray/data/__init__.py b/python/ray/data/__init__.py index 89d531aa2ee5..5883ae6c542c 100644 --- a/python/ray/data/__init__.py +++ b/python/ray/data/__init__.py @@ -48,6 +48,7 @@ read_databricks_tables, read_datasource, read_delta_sharing_tables, + read_hudi, read_iceberg, read_images, read_json, @@ -139,6 +140,7 @@ "read_csv", "read_datasource", "read_delta_sharing_tables", + "read_hudi", "read_iceberg", "read_images", "read_json", diff --git a/python/ray/data/_internal/datasource/hudi_datasource.py b/python/ray/data/_internal/datasource/hudi_datasource.py new file mode 100644 index 000000000000..828d9baada7f --- /dev/null +++ b/python/ray/data/_internal/datasource/hudi_datasource.py @@ -0,0 +1,91 @@ +import logging +import os +from typing import Dict, Iterator, List, Optional + +from ray.data._internal.util import _check_import +from ray.data.block import BlockMetadata +from ray.data.datasource.datasource import Datasource, ReadTask + +logger = logging.getLogger(__name__) + + +class HudiDatasource(Datasource): + """Hudi datasource, for reading Apache Hudi table.""" + + def __init__( + self, + table_uri: str, + storage_options: Optional[Dict[str, str]] = None, + ): + _check_import(self, module="hudi", package="hudi-python") + + self._table_uri = table_uri + self._storage_options = storage_options + + def get_read_tasks(self, parallelism: int) -> List["ReadTask"]: + import pyarrow + from hudi import HudiTable + + def _perform_read( + table_uri: str, + base_file_paths: List[str], + options: Dict[str, str], + ) -> Iterator["pyarrow.Table"]: + from hudi import HudiFileGroupReader + + for p in base_file_paths: + file_group_reader = HudiFileGroupReader(table_uri, options) + batch = file_group_reader.read_file_slice_by_base_file_path(p) + yield pyarrow.Table.from_batches([batch]) + + hudi_table = HudiTable(self._table_uri, self._storage_options) + + reader_options = { + **hudi_table.storage_options(), + **hudi_table.hudi_options(), + } + + schema = hudi_table.get_schema() + read_tasks = [] + for file_slices_split in hudi_table.split_file_slices(parallelism): + if len(file_slices_split) == 0: + # when the table is empty, this will be an empty split + continue + + num_rows = 0 + relative_paths = [] + input_files = [] + size_bytes = 0 + for file_slice in file_slices_split: + # A file slice in a Hudi table is a logical group of data files + # within a physical partition. Records stored in a file slice + # are associated with a commit on the Hudi table's timeline. + # For more info, see https://hudi.apache.org/docs/file_layouts + num_rows += file_slice.num_records + relative_path = file_slice.base_file_relative_path() + relative_paths.append(relative_path) + full_path = os.path.join(self._table_uri, relative_path) + input_files.append(full_path) + size_bytes += file_slice.base_file_size + + metadata = BlockMetadata( + num_rows=num_rows, + schema=schema, + input_files=input_files, + size_bytes=size_bytes, + exec_stats=None, + ) + + read_task = ReadTask( + read_fn=lambda paths=relative_paths: _perform_read( + self._table_uri, paths, reader_options + ), + metadata=metadata, + ) + read_tasks.append(read_task) + + return read_tasks + + def estimate_inmemory_data_size(self) -> Optional[int]: + # TODO(xushiyan) add APIs to provide estimated in-memory size + return None diff --git a/python/ray/data/examples/data/hudi-tables/0.x_cow_partitioned.zip b/python/ray/data/examples/data/hudi-tables/0.x_cow_partitioned.zip new file mode 100644 index 0000000000000000000000000000000000000000..9f78c06de94524454782e38dcc6bd71418f5aaa6 GIT binary patch literal 46757 zcmb@u19)ZG@;)4PCmq|i%?>(A$5zL-ZFOvRY}-c1wr$(?KRt72=1$+4i{JO{=UF-X z?3`0oYp*)1-g>LfkrD+4`2g_an9}i5{N>~ye;@%c0c@>}ENwJwwX{w3XyoO<0f5^+ zLF%(=LF(H%LI41QTmu3Cfc$v5)bCPiVSWE31@R{-I)-}Y=2}L-O6$#p(I0vne<2O= zO)A(gHcfLb@F=`J(|jA$evyYvSpJh9 z&LKMJuegu^ZOvtd8}CJeA;7&ONzMQ>3GF|D6}+-ght)074=^$vv(y@IQ-0Cxm+UqZ zBNXW6A$9EF)YH|55wsAeGATM|aMJGOt*iulQYq{#-CG0waWj53fBNl~s)UjxT7v@s zROh|}Q2874nOSuibQl=fsPviX>8KcWbXlp`^jNj2beMH@S@pE(X?1B?sc1DAsp#n6 z9_VT58EF}5>FMYhXc?I385pSFrfp@XXZy=s+y8onj+fm`)%4H+&L(9<(7aja8fs}f z^NY1a29J-;_VxJ}DvZwwAf*XNx80M?6&?WV>N*Hf5fCap1yF6+xk(cGz}Da+yL=6K zZNYe?`9jbc4f|eTz1CLUQx?ly^p2O}R-imuDUV%NFS$mYMjlfyQ*-+#iXO^NAic{; zzB4~JIRGjguHXyV=et?c{e%q4}>$#ZAftNTpkt`VgA z1Mh9vJV(j8=H0x%$LsMVOeBfNLT#S%jp@pFJA<=Yv&A$eW&|eTq#TUD>m+5jGHzT-oNv( zT&{EAcs&!makrS)C{?|g*}9>jZd%*8EWNuHt!ueh#%Vt6g;9O%^GlFzAv=P?aoM_8 zh#g8U-(QtGkbqyy38_lGnMLQR`mY;qF?PvgGYWj)85C^)LnM+%i7m@i!H?U2#98q%nLq z(#F>a@UubC(ea(IETBa$`LI%#i7Cg`@?_Ubl9kj;QuscZlO-K>n>grx9V2Ua+8@Ej zf72%JLrzDR;}W@9A$Fyyd7qFLBRck|+y_HNB*xh|Vgo-WF>$1eX{$U58Z|q~%^~Yy zJF37=3<3IXLuQ^*sl*~G>D9{2?ekj4B;|#PWmLu*N>(^6zD%x2Nz(!olLM7CJdJb; zI8|H4&5dsG!H?2zsspF7{()T(20ruY$0v;+0MHr%2BJ23(KPG+ZGf)EXlVDdG_-+qEJ{?oKJ(AgKq-ia8{|Vt;mvtQd{4O zqd3WDB8DF)d5G;|SYf@tmQOQLKrNenHt;p^u3d(TvB7f1kWAZYXzcQAbkx+iYznvA z4W-%GDaf~7g6~Fg?wFS291o8{x%RE&>dr7ILbyRzqLhIB8;c z56<=03GB`Zlxs?cO`|SZo=P1Fl3ccjSq5014j&6`UQ0L>Jh+-%J1w}Iqnz#QQbwDf zuUZBmswT+Gy4#zl@9T&*o|;C<%DQ9k4CaU!205B-MyYMNxW*KmmcJ-(Z@s_$LgJC0 z{3JQquc0neGqxSq?z&Gy!xrSW=D$7E6x%x)btf*H*-}Dwp2QH_%QA0hbNSgyM{p-s z40DR^dEz*r%!z)*Em89LK*+)N-Ea{MI-S$y-V(!@baH);4kfwXMS@K3{YuZGvS^Y; zkn(J;L~M|3gUn%f34yO6BlmpcP2-~SAcnZBp=X@}M=87l>ME)Sb{aa?u&&@{3#+WH z)!~w`^6ZNJ<1q?Ntm(tB&CxtLO`y)`_37h%#ctASRXCTtQ6>CG_VdQR$f3Qhjf`#a z-91T;=PqowyCDRptMl#NYWS42cyfQtmzub=P6E#RUJIDSRr)6D`SHNOii?Ms%neI6 zQPtQ{f+*DK)XQygWMr*+*iSM}-1S|f6ey?E%vK%7JLD)QXo%&;2rjZNTR3FH@}<9P-HfvD#l<|M?nIRmE@m zNt&+zY@Flf+^>3nded6DX|dfKWcrH(v%>Q~Sq1=*yv97!_!n;m2%G!!yxK6j{QfKs z0=4ir&B>O(NeO%3y7SlO{HkP)kV)>h`Qu{U-6RO!}K^B&9#i zGZ}Vfx)I>cgZ6h#zrXYAW)BYNPit&!s8#|2u-0ni_V)1Wv6j6-Lka-UW_3Q2&bGuz z7fJKEQ^)swT{}97bjRN!gqg*^K$kp4j6^o2pj21Ep|Hq=l{2{*J+CirOjiR7^0*PZFx^Y(Qj)kqd%GKZ z2M4vp;}OD-S*fI}-_({b?t*YxuUgYzR6^V3pA{TlApUzy{15c{A2IP?<8`J z{uL(bSeTg^*-~5Uy@5p=TRq)>hmHRLu|M*HdfVeC&o@`00tNse`Wt<}LU3wJYYR)g zHx#aC^NaY_H#quZ`~O~j8M<|sn22@J+N->nPVeQ>|?crAwNy{HtIX|HazJue9jVfEK0r$ecy0&ZU3Ch zl&*=d*ByfJ8OF+N@JJNkitAd$bv^;N;I)g$iP=1nPIK9Ed+}WyAFRKeH{L}j zshCL+_fJYU**~ShvtvEoc~+8*HqB` z_&Wdq%D-9P-~EY^k&TX)nU#V1SAY6tnLobegfGeelF8-BsaPz$E%RyFCwF4CKC`i2 z8Dcy$c%VW~Sj2`K;fjN~gw1sNE{`-FAVNyh$%p7Y*EpLeERx42b(n2Mquc~U(R(#I zD2PxyPiL6sW*#VY&B*Kz5FH=ziyjIEAYFb_kKYM;2Dlf*aJ)39iSIA$T1+OFU6(4yCmkF~Po-AjA5T;WA0 zOA?BV&;XOph3Uo8nf0=&IJF%5r=Dt1eomOzE$&TMatDI8TQ-|Xvk78af)(cn6wwH* zo{Cp#-z-H2b!QFgfbRKBR#18!v0cSJaYHA0m}2)XY~e*?`w%lguG)kH*qsi7M$DLv z7~PP?pxwPAjkMR&aCw(TKU_7cu2ic>rhtByMC82=HJeBKR>@#yWBqiptQEycNMJv* zk9sAfI|NhbX&sOJ2eSi()p5O2(Dn^N4oO?^yhcotxyxpg4nXc z_|J(Ax$!8PgJ-2kFYnNku-0}IB$y-Sffjm&POSsR`9h-AWI`V3;8{P;RBJm$8a{R4 zHFQ8sUI;A+38O8s)W=5CuT|5AOt1o7Cvs@F8#+X=67b}pSGBew}~+REA@07FKV&K z3fp^nkLWj&i!Ti$%Sk*8)NYT$AEgarg*&!Iibo!)93_$j^|UNXc44zUWw-zY?phJ* z^yHts9uEKH$cXO5H2p;}V=LxC5CS4DR(UgYB@{bl_8uz6DWgEQgLZnM%q9F1DbvSh z1-%&@T%Qn+EQl0FN-X^oV-DU1sy_le38Z!qki~bQl7i+#c$Ta1+i(5ApO;9WEit>nJ8o}$ zmK`qem7nmtR7D@N7abxO9i(30%8(4oB1|F#hBHv6XwEUoDCE8oUs{^iPRDt#a%6Vk zAytRc7QM50(R>NRqP>YOFRSkf)2kZvy4Dbpw{zAq;?N}_(#-`!wIgS=JY^3@4{Mb% zU@b7eix+s4Uwggx^mB&^A$69} ztfcb|s4v@0W&;K*>VTEBRv>s>WMDy?`1~i#1F+djLTtTCZT-*!F2|eCgco4G>loV% zPeiWN`VN{l_0wfZ;`Z-lWGa%xZsmd%5e@Q%DqP;{L1#ktK~_ZD05M>mX~qw!-#c=9 zatPqLC#gn3zPeJQ*nE^?TiR0)*_M7pcA3JyxNbqNW*kdcFPgrhDW4ktTKZ)d%>lb` zWQ|jT^HHjE%7x|HmL(e)9FUH;kk>pD3%xdooV;?t8oS>3M=DJ7=Hi-lDEB?ztnrz;SC6az5#`w$=H7Z zp1%VHBXfOIBLl-<2}<_Ah6=~gqN1Ij5Pff-k?;=RO$ehGiY|hI`i^~34TwJ65_nI- z(7E-+bgsK%d_6rL zt8vTaeFc+=N)e&t*c4R1XP9w-WxFV4;RZOTeim#cq5E^Yq&0=hNvbN} zsU)Q`C0l8UVBIGf<)M@xGQSZ=l&)_XmF*+!%osnuMZ6EI7H>;lfZ?2LVbE7Y<%Xf+ zxS+gP-GjR3lhdkCoqC=Z<-jm-ovPuNA~byJ7LI0OqTt6R!hysoz2F)KAuSSlt9u1IdQ2_1}mu}-V+`*w*dZqEWrG`SYY$U;hSmwml5XYs=|MC zn%}qnKRL}0_Wmyz{J*yz>u;>rveq#)ve(o7Kbrq{Vt*?){3`ZG!t-zO!_QTczssq9 z7yH|1>ld*7Z^|Zrog#|;&CU21@w9(F=xC|mGAdiGzf2teCbRovzxYx8z~O$~6MPQ< zkbn;W!291?@q4E6yD$8c6~EkpA6VeW_zBo7E3I0fv%$KbYG|EXJHW#lCdmz`z{?+TEo3Q4$rNmnQA8a9Za<_&TS@~v!wNI&gZfCR3pownb#xg zIAzsqDfpo;ub3W5a@e&ToSmOqHd%oom{PHaGM323cPRQH|E5mshqv`m_k9#N!w%hI8+|U!sAS=1RMawBZihqibbPgVwrCyiXrVlqa4tE} z5k%m#%4>J~6s7kU9(p2t$yP6BR!@Vrg$ZPvv>Az7Vk{Ck$!S?5TEqK^S#m}Bw>s{* zD&iyhIJS$I%VHed16$PZ=Ogo=w}?U~;l{3^|u;hc>;IYr`y>5$Z~A%tXiLLfxY1wG`l4Ns(2XHDkF;P6Kr^fk|U>p&?beujxxy%r1Q0w>+-z#ZjZ)C zoXdh=OtR|Z=ao!eaT>FiZDDuHp3)BOn)1g>EKm@wv_#CqwY>TY>=-4m#K*}gsz4m4 zB5EhFkmNmpg#!p6k)Ff$J3jDaBji#fEnp#T9ZHF9Babm=`&}Rt>_>{+2Fw#I6PRZ< z(v>c!z{XySWrv_5>D9}1fC_a$`7964Z)R`V*xRe>-n0ZRp^v&FPN2H3o-z}rqdpu4)Owy4*+IL zF`=TZkh2?%s$scRZHUbj2{2-Wge7*rFy?rkn49s5Rt`0ajHHMSA&g|cK^ag1$c4h^ zePEMv(TF%X>aJS&uQ5^zxzB3^yf4Ln$#m? z6)*)4Znd9Dr?**k52(i?P7{a8P&NKoJR&%HRYd6yNBqZBvpbD z12~DQ5JHP8LY2kN>a4+z-bdCRq+$%SZ2CEFjb@@j#MByX-Ah9T-#cy(t0~|U9iQo< zs_~{|5tqwWXFfCZzNZJg9Oi&2IdWHXU{1U@PjiL}Ck{Hf*KBO63*4lBWV&1;8g^@Z zsU-P~U_Spv_?vhpVe&2-jNuh`KOZpi87c-t=#xW7t2TsW&2}ma$4FSDP2UJrn@VX( z!lnZ!>KJaI_H(CzR@Y9K0XFIUS#gFdNiBps6YhLbX^?-vo3YS}m2%eW-pp~PM*V9C zlYC)y!&QuQ7BU>H|QA}Y<~%~SdYv+X~x7bjd|ld!`&MMe{r=w9r51! zJBWkHgBWp3xUBd0O|kJ~x>|yETFMRwrG=1rEpS2i5m*uCnjQ4g7<~x@lLaAu1AwaI z3ov+Vx7DGAnzo%xZFHbp_(5T<=fR~rPr(Kc9;M3Xp)A+ShHG*dn>gx{m|4g8aBI7d z4<1Uvrr~9?a7o(lfx{>=qm*n%m6Cv#^ysvNkPw#w;~t}K0!Mf9mRPXR>1Z7EpiTP6 zUFbcpONsUyllbZ#7!$JR_a2;CaZAhQi0bM#xlAUjLkmyo2Rudm!Vw+*!quHBzXa?!tSee-~$4FgKb&mU^gYMbmR9-}BSl4}po z_VruG)HT#o!h2R}1b5@&cD-kku08%hRX;-bKhrV)HIVqXA^ax>`bWpYkMUD5{{u+; zz>q-oEZ7fkNryh({{STZjAm&63}^lwJ-w_XYJtH5+p?dA8gU{@bsA~nM6x;#fOlRzN&ASgfyq_1k z#A~!Js7e7H^*OBZ)oE7fa*2`golV>1Z=oO8Y*Dl_9V@# zBic^fl}mr=`Yk+)T_$-ULaHK9=f()yi#jVbNgIW?yz`xr-$r#l77X}dBb?KTrWP}x}qSVeb;v={d?6e zQ@mw4@||3-ZREL#a0H%UVj02;U%NCMa_HqCmqaLah3C2vAv?Y*-B%p0-DX$V9h%Ox zLA|=d@v_q|h{D^(<}g}b``~PeJ7GJ9(=pP~%PIkbu7dK@X@b#On)wF~0|tbQyTZh# zINqtoIg8CArI0AIQOD@P+Z)S-6@;Ewj9^m8r0A)3>yCXX@I_TxPsH%z z$dGLt0iS%yuT2x+H-n0n-GcO_699ig_zVC$_eGl}S?yb@q_%5!X&+a!afh}p+bgb6I1WnK6yxQk0Tq_ZP- zfl5Yk=CZ=$J=-?*@r(B%3U}ryH~hR9Wvj%Abw18snN8P8V%0WJat4|$DZ6z2Co^^y zSt+Miu|{Z#q>Y2%qyA6_wO}5WQk^#(1nO{C7y#@cS%g_+Vi?`sPhaW{| zkF9)v>f73jj6DM=XYEu^*LF~m@4h)2 z+127xkb)WljKafI)c4Ns0|YUzne1 zdO=Q!VHI`15_GCks_(P%u&-4M2ymQMoWN*EmmyWE9!4;YdtJtzvWhyBNUT0vx%GId zx+cYJhjF8selAmcOX_uaMW!D|s|)j%C#Gb3idfnn<(1V8?kStBol5`}3oT^unOZE1 zPOV}`qy>Xdlv&ArtaetyWKW{q!WqWbghq~_?unJ!D*K%B216fTZs5zbJy-H zj_>IKrmGjE8gAyCgPluxqLmkzFqgqH%dsyS*E>*R?9T5U7+VgwYE<+J z5ZG-AKDm;GU-<%>&r>je!@nS`XT>Q`J1n*D%(Q84^9XJsOqr(9^=hnbkbWaph~r#f ze3vR`;nOv@$?gc9&$Hfwe>e}{v?UT+8emA959q!3O3|Ef6ypg7{Td_^H9#7ii=`}& zu5P4C`&M;tSh+C5I4BBx3O9Dk`N)MZD@YO_Y>NaT`^B-x*QsJ*3eFp{UgfrJ1X+*X zO7E`&MxY{r4&C@3I(`Uq?s)gi%rm^lIwz*NzXpHuZ=aRVFeP$(n6KC~Psi1fp?j;S zX=>K5oG|?dEg#pDK|yA|1F>kQ5X|b})YQPE9ho5Z0A6xQAEfubkT=&A0ghd3%O@b! z7p*40qqDL(KEG~C_v^Ks)pi2|(2eYavDJ2kXwH7>0W(as^swPv%U#pEy1UJ@q_5jp zSp*!-AyZm-`cUI+Iy)$FPNcxcCjJ)v1vCu9%SK+G=s8qZ0CM|69E(Ii~%3RpDMo=t~B(u2M zrkt%QQB+YKLAjfN$mEUIvV$oI^_z0suDxLwhSU8G-+sLJ-dyLFyeU#?h)UAAN*vu> zt>4uRW)DT87oW`RDsX5Utde#4qfmAwtX=BT5Q_*RnMoNz&KS#i&)bPV8j#43+RO!r-2j5BmbVo-M3 zVZDP5$V!xPdtm^4Bty6UamA4|mc z^^NqymFVLdiM^+vgtUmWgu>1mRAlPH0=Yli+?_P<*OKdc!0Kg_po|N_W{>s^z4QlX z9h!-&&NrzL^!xPWk#MWg$aY5%c3bs?ljc;5Mq(LE=^DUcDj2A!@L;@{Z5O7etj@$< za+oU;{9%fi=5p`@i)hk#i1RHU$!wBq6?Gv#g+kgNtNWwSrcM%;E%8Rq^#7m8ncvyn zpUIgY1-T#NrGSzJY)?1Le4nw`&9o0IU~tjF3x9Xtbu-XK6J;zJvvtv zUIu1FRY1RFNk)7Tyy2&YW^5ar93l9IsnEjXpP#40?<%wcyv-XYy**OYi&8#N7AS2^ z3NB|()v4cWw9bZZeRp{(ncZ*Q$3&?H(TZ>)h$0=Brl@6)vlAtY88~oM z!^AERxIb67dU8gS=}c8*L`)=;RnpDQh*Yz@Swp!F4}UF2sFxBcJg`Ql5yURLpq}gt z5izVCaHiRI9Re!oU@z0W04@bAk7cB#ERJi~c&${!hyFIUIG=Sy8P%?q!muk-plc)( zi5^%Hl1T=2av-<1GCrE&2(%p^9E^mZ-=S0Mt0#7vmMRxWORKKseljEfiNBGyD()9* zX7zj7NE*evIlaN>BVq>VrPrZoKibVFE32nT)7%8IPTHJAEulJS9LzL_lsYDV6Hl3v z%oUjXNOLB(#Ioh_qgIQWB&xkmpd^^Yz+J(#r5zqC-6DvAAORytS=QrM4C}`MzQqqg zgel`f3?GM1LNh(+_&mp|BereJHGgn4E7e1tQOng5=HC%mN^Zn|;b@|N;b^G;!O=L7 zJt>!;;JW5nuIZR1G_{^2l=)$-q(u zx+03L4SD$up$_nCU{C z$?xA&_Ml+0Q6vhA6MnT(=6^5h!_27Ol5JEkg-Q-QiG;FL&cGIG{n;y#``@5n*+4Y?&3jJ>L_Zm2kIE-yO*P!7B)mzYj(w7mz?25NWHC zH5IPeDQq8^PLdP0@_c9Gi7DgdG8YsEwm2f^S-Wk`9~2E=rIuPMCvz!)=QCI}K5h*HOb*d^1B)$KjXzD^c0+vHs9n*C?tNbof=BV%H zzv`AXl4BF!7T`IN>|;H<)S06^cRiyF$nED2gNA>LSG>Q1FUgOuqGro7M8%G{kuf`6 zxX~9^)yWp0v5fN#7Q^ihBHJw0T%$jd>Cf44RIW0Z9~nexUbVU}Y*v>qrJO!&`;xIt zt*N4?^>O(NgY2+YnZdJcF)5zpsRpDIZm*4HUGAvf#v?0J-^UTe%h8Ht#2N0<&{N0g zN0exy#c+;+x-=D<3tET$31z21|9e)2y1du8nm*_|XRInTd)l>neDVbT+vFOuHl$!O zzQXi1_3uk@b>XfpteY+SV*ySL6cCD{E6sSail@p^rA=%;jamJS!veFjoq$$u-P8>Y zaCzok!@Czk8mQ@snBRyDJPF$GtcBcZnig(?skhNR^xcFpinxox7i+{rGcN=g8}>Q$ zt&`08zJ8C{T|?-{-~tV8hC+$7LieYDMmv~8@q8} z+;@?C2TIB|6J+<$a|Ji2@q?o|Xy-FfSF03ju(WRU1R&4+w}(1%5ff3C_#EF zsGE)Xu8dG0wClN9QLsG|71kt*Ww#o234d@j4uW`yw5LiU*G*fpC(SEd0A?g#qUXJu z?uLy}mCa}T%4Xj&G5RP6-|_+Af-9nrC0J8kC0~4${KVAJynG8-1wiAS*UEk0w^Z@o z;62AKxq;C&#gzS=IZMfrXsOR(wVJ;q%cJj&v#lLnsw+k)jX+i~&*AhA_a7Y1kFxCl zH;(47BKc1o&5xq(kMUDX{{v6_V2bY9KEnyVp~cO&rcKfRUy41{e<}k04N-I~CoT$1 z2ittBKqgB3N)~`K5Kiu}B<-+g(?l6DpwtsS%zwz2XJIwW7 z#qkGFU%k3+?|`l&Iljwo+uG{b7l&{4&l1-YNjtH(`YU4C;l&??qm=y!g-|(}B}u`^ zwHYP9HR&EJ_DcHtSbCU8B4bQri%p&&SBG$MWMhyy;FjG>oCTDF1$#=1NJX44sf)0X zAHgqF@;QRRYu|OvgskyS3XKBpgOKov`T4yZ6dOtI1iX_}lBx}8w%^r@hRTH5X$CPb z4->cDYOY&E(?KK2-(-rp^GOqF8O~p1YYn*0S+AO-2lk#*T!{TlE<&)4u~`-G6CSW? zlTC0#o^c6khz4|c|(6`!s6oV(o4k> zF%4Fl`YY09CyHc3GT&D%;XZ87cpHD;w1yHElzk`MLAtzm7=%wrcR_i1crI zAb#z{__@)8;6JV9k0tv@TjjrL^Z0cO{V~JAug6n;}d)$ui_27bSydwd^Q*<5oP2`5*R5Z z3nREL6M95u`n(T*vw|8~rW{gtPJmA=7$L4e1U^0~DHyodJE*lN42R{S!Ib%Dd+u$P zQwD<(o7CM`?%kp9480rak309Zwgeyn(<%2>{vtLXnTo++&)oq4x zy0343mQ?!b0AH}A`T^UbH71ou9Si_KyCpz0NmaKx?O`ydN#$^XiVE?1!?RZsJYu4| zn)VYAs2}}FcEwPyQ8Z-YktdH@a7v$xVnX0rZm3>(XK$Xd7>T(3Z0ea>e!w$NrA&Q6 zrP0Y=$ANVfayb%^57vv(Q~b`+Gn+5+D9o!2?)4M>(IDhN-*N>%pxeYs0E^hb9nmolP7kooa9o|pQ;p8SNZO5`mVgQ}ls;kgYhrI|dMz$8+%2zrxQ>6x>aYlSKse#6zdTz_{ToICw*I<05TkgQ!^cd!}IfQ zI2O+UG_WeK;J0P3;6VE!c=;in8LEw4crpW3k5!M{D=zJUB8s~rc3?m`E_D=mj=mq# zBks#=nH&M1`FMqPB-Iwn>;NawVE2@-WmP(BYGM)i{mP9+Ke1@tALo^Fd|;Xym}#Gw z=?@}#XABJBltq;U!g-I+?4%3NQlViSu?7>WbKiU;4jYk1!78Cx9FGGRVh$EC6=xD; zAdh>J7}F7=-u8`pQAz;>xV|&VVF90jq7O$4!N?2Z%73@?i%-Ik6V+WRf@bKbE#CC#%>kpb}#!mT(O&mVq1K{%Q7Rp3^dVIeV3bg?`v(Yb9Avc?-0q! zq)w!HqxN7fG`Kc*)SvzFe)L)SfyG&X9H+!FgRo(o&R;c;K>dqQ>&z&GuRNG(s(0^r zIAI}8y;7}=#Rk=u9&Si_b*W#fl*W!qL#=LbGf1Rew-a4?H;^87>~WE7#KHyh0C8P! zZf?MHBSAg(vdWd8z+sc4v|rzcNJBY1c-R$+Hqk(X_ZQWx3=_+EQOaOT8Hi%%H?aCt z;*9edwXtT8E6P-vPk8Squ_G=;k)CVI8O-qeC2H;-rm|yLvKXe*EjXttqcEI$;1?B= zbXWpxVOg|i>lb@SIt;pE#bOYGwqYhFHZOdAcfZlhO9eLQZ*YTzf}_JRaQHI>F-j%c z2%PW|e1w0VZuPgq_)0cUqLpoL8Mr7IxrpC=#d#X^BpE?~xcQFGx8%dr)Hg1e(g${e zbvGZCgkd*Hbx9u|MCPNGH)sE2TLNbgA@*sXoeDg0j zIE$h%XZHOJt^z#T2$F|Obvr_=kzM^<-RB>3!ez-i<0()p#x2^kQz46c09oF#FU=(r z#bCr?){lKemYVTzef>gmCJfK0F7H_=Qx40z3p8O@qcLe`ch;vdoAO0uu?mVfD%X@T zHKkbevXHl*J|?!sTPZ&=j#P4^$qRe+TkBwSFh{=Z7#yUhCS{NZV#+sS*__sXT?FXf z(FPj{;pRBDr&?*=tOG-ofd>vs*-+=}Zr~^9`KKmaV^(~;@~R}mlZ(T!jEaOS%@Hcz z4nBd?C2ZslN<$yA>)^dP@9mKorS|R^1rtR>5H>Uq%CH;G1YbN z5+uJyelxzIAg1W}7DTs0kDys~INbCMBR$5Qpy)KA#ES%(cD2ae`w;mq;KiVK-J#}U z=;|swK@n@f4Q5THsp4QY&Uqzd$sQ^65iYHMB!_1GhRDl5@2hd2Y)nP42`}^u6A`r} zV8{EIut(Z@VcY1>@rpDV-g3OJJJQC z8yW{%I-ho`^9qr&4d_{rKItCmrYTK=E7Faj8AGPVU$e=D0C$uML>#^ofZw=f&ig`e z7FT@fx@tJ3OI`kCGC@t^0!81cicnlz$c&;UWFBZ_y%3Pv3J|c}+y7+Ct5$W=4~Bra=G_}!$p&L5QLM@;>EMQ& z<(^_U#1ad!XP;{ zW&;azGjq*qWP~;q@RldMFjl;0q(QNY4D5=8@`!t*zvGrfwUS$Ih4RJJLiNdyQqm0X z8gSZ$^3!(^aQak^U#*YNKW0_#Q}u4(C@(Bxi8UFZe4@~+Ej7ukY!}7=)_;CSFeEF` zTA**F?W;kwYHVO5j~+sCos*U6TS-@Lbb%{I$7NHl-c=Ky1jUIzap15ANpupnxVPyk zLU_rpXZs{HX4ITfv99hy6knr%p8GAayHKGMax~w;S0DUV?9!SuS5Bn4ekDG0S51{3 zxHsD9;8+m-dC8{eQTo6>ru-x`Iajr@lwF1)JBAqXS`+YGTK;MnwHsnjaN;u*ewCQ$ zK%(CW1T02WNQk54``%#+z7zAbRNsL@34AKe0##VVjJc$8QSnbIZJz*$G=b{@1aH^R z+7sPLo`Gn265}U5@e=V^hs!N7S7=y;Cg1r(K!R7&T|z)q-qQABaqs)zZc=5uuFJRk z!{Bha-kq&?{^R7n6|d{@1^CYdjG3f8_52%U|9W3rNYvqY`G% z{z!=*^kJZl1M|~oE52T=2x+O_-i@&QdM+%zrAU4_D}HIrdkY5=Jlx=sz1P=WtP?iJ zsFOx{4SBb^lM;;U6xJJ4w|7iEoS?7yp4?Dpc@o?@S1c?HI0nokL$1XQz6riL4Zfhy z)^kNd#Bg{#@iSs%9i7>(xpcCS!F{I^s6B#kC_Z2BX#rjGTe^v8!x7uFc_>j`k#Q+K zL&7W9q{w>UwOl`gf!w3Q1kCYPtAqLT!Eqt_!2`)b!U2KE82WwJEQJL`0FwUP*PAfY z68QlP5awm zkiNe3APSyKbhB8VfVHf!Cf~lXHcElEz-Dpeox6A`e0U-hk{=^A4+nz-T4x#E){cku z>>fT5)dTa1nv_mXgaOyY+`tb&-5A^zxEHiT)z^d0)6oVJ=w|f+;?Bm8Cqw|Tp+f^C z>-Xg=kC#O2ke?+&1H{CWYhMJX69dWQ#hn=8Td^{gmm|ZfBe@h~8zNeb!f7{IW3=Q6 zs7vrgCVZeICJ$LCNW|ae@oHAgfMVD{0b64 zggb7%^q@j!)V{10UR-IAAOzAfvxmscQZuBJ?t4jHs^e55Sf?4rt0WJ0bn4^qiaqEhn$iK z*ot`~z(vR51t%Rsq#Z?O@WME}_e?tbvZEQ$F&Fc-A2KTxhPp_8-&DP(e2o4`nCS4Kc|It&H?;hAWVa5iZ};uh7Xrd~+a z@5eely}g0Ri}r(NmD9OXZ|zs|#wu|&u{G;iM03cpi~UwMa_eU_$np^fqrqf)w(P`N z&NpAAb1uq!_I=^L>T}3MAAxQ`IX))#mw5=%kxe$(dnIobEYv&XW0BC3VS;oBTwkU? z3Y@QvRNHiMh=F`ShL$Wt1XC@W^x2~>=F;%X z_z7p|MdBueJukKp?);z^Kc90^^`|<4Dc)=dS`@r*;vei`-_u*0wjiJrmh%;Sspe~8 zOhW#OHg;!~-b-Tn&=}BSrtQ5D%BRW?wPrbY1M7C%>GjlAdjG=8+v2yp^w<1prDbmYOn-`wkvbP7UA~d(Qp&|%N0;Dovy8&fjFOVtmr6*dW z#wu}-7?ThM#Uj4MkLEF_g>|0b-qz@qO}H~6RcP^?poV1;qU=9odGW{%liG*q@t6-g zo`)BhiUk)8xFSkTCpj~xWh*7!(=ohL*5sO->&rqoQ|#eGM1zQ)U`Za{I*u5mk};fV zuN4IY;81^5xVs<(A%Lp}{fZXDj8Hz!?|uk*FF4)`e(=bdglAYE3RIbI_`vKqEsFaKY;oIS$+k>+xHRG4)dqMg4Es)Kd3-x*y^MOee zSiT95cEAru!}pR#ZsTXSMYkypb(_ccCA;VjIQI%ulMV079!=4a6XRAvBvEa`fH7DT za(gb8vot6<59-(U9^Bt;iZ+I}IDB&3S4rw}%=&zNI_YOlQxAJ?YuV?Ivbdru9-l0$ z0_Q}Oesk|W%mnOUNaskVr_dwRBVc&)c+R`j+0neCJ}#@hqo?v<{K{sX-{7a&Pz$F_v}HWBrkKrz^RPH|t1U*B8K^RJf@D z^EWmeOH1E}cvMT;(xi*(8CZv{c89~iP`ub{hf_g8k+7H=D%x9{SQ}aIKYCsGogtkj zK48fkAnsE3(E&xVJRZmukhI&R1d1m4<0*hs#6ynOjkO z=B!L4S-ELwHjdY8HCk+`!O9^ipLWGgS+F^%weoc_l|XV&t6eys0hF+QsZ3ZvxTMkL zUQmgb@8i-^#6HuMchcGq?U}-iqZnaklKq^KM-^7=f+78cdlBw+reN_UN1YLU-1g90 zOh!M3W^b_3$wmD-D>}&^9s%K0m->P&WYy>sp+hcL$ttF^&AoO=#q}6tQ34L(Ji-aN3s_aJ$`j5Rt)x`2j~pb}O@hhvu78k@{? zo!Zot81QcjpLvPg;Cxuf&97PLQXV$iuL@p_ z`~7|Xcn&WZ&OYbt6+6y*)?Rz9GF8G`3>VpZTrCvq7vxn%X%#klQ`;O#T?rD zjjc{^JAY}id`n&T%~!hw3T00)(cj#amvT6R=7?}~P5B6nNPi9#ZUL#FGBg6urDrN= zq|InCjD-exSaHs)#DN&z&2X(YMXZYVyWNT?+yTMl1F&=G_%q^Nr){{#R%bE%=mq8?MRN`U*r)!&V(_TFe$l*3f#J z5##$@=xk2XFlScAsR+4)fh$$;?LcZ&aUtZ*0B3+9i{aYq*Qd8ucHQ{nnf)%mcb@U& zs?!mles-}1UD9bUV$RtRmIu9(7|@iSnR8{PAbqdMEX#aiZ*dLO*x`l`_cdG2!ZuQM z30l+X5O^B${D{&vk%=7C_2(`mUg6|6jhzCPqw{Cxa8jQaxtV_RVt zx3&wRLZC!-)2&y$N_2DtH&q|!B%xz+CUx4&TL~}=r*QOtYAG`m>(!u=hs40Ph0sDa zI$zOY1n5DM$%&R4?K$Lyrwb+?K7@4W@y0qC4jIve^RYD07*GiC^cEvEc#}+*~7wzHA z+;5)f$?Q4Z^MH@6xtFRi?r^5TZQQ-Rv^$zbW=z4dZub+Q32O@Rg^By(*;L)S+fkqI z=AA~s)2KufGq}d=>3WS^J#0t;7V#4!*RfgvK~QisY=2>2%os@C<2E?5Q9d|`1*a;v zD#=pnrW#1}HKw6bMKtH;_etAnNq49k(;LbZ0u%RV=&K!)tcBU_6=x9P|<%UAS zu1Li%K`S$4eEr;YUisZb&=~!Gvneep>cNgb zS30_Z<{R?5yRZ0>J;nH|xrF3pb~)Us8h&o@ZB)B|0D=(%I?RabE6*>~zZineh)BTq!K z7~C}-EkBd1lZ}^qa5vmzRgcwhFZRETNBs1IUQK3u=3^_ZLe|Of0t8s>R?Sy4=bR_1s(FX14EjSB6U?HYgm`f| zt@IjFfJew*slOEBFYF}DNIMDg=Xa}SvYY2^R^;@ap-^Ab6REg%&EVNZ49pKWk$0M2 z$6dnUJHqFoe?+ZCrT;agJxA()YtLc*YR_T)%ITq##0l=YU+g(gy0NZd`OwB_iFfFB zqMoYM!h7zi7kd7M*0Yd%w`9)U7L#F?KlPX>Yfu?43!jkWEWU+uTw6$ut_WV}T?5Yh zj|Lv$**$(FVraUpAy0`k>DbO49Oz>$McThj-d)^ZEQ6IS4SL1$?+@C?K3N~;dG5yc zIgt=IEmQ6OjVKW zgfOixEU2FG86!ANb=Ky(yV2Mq~OMBr-~B`S$ZhwEcf4P| zl$-sabe%>sJ5|ept}geEXT`O80?XT{JL=6maN4Mh9wMp&ZVLqv zrASMh9D9#$oX?2}Uwi#hiymby^7&ZJ*>T2G9+{PRwYlY= zl&{O3Tg1C1Eo6(06KhnSi1xrVPVL1Ss*3#9^e4;(tIx5V4Gb#|ugu|RDhS2AU9eu} zj;3EP9h$_N{TBBfmhh7MGeQgK!%)EDTkc5y}Ntnbp@WrT?Or~oTm$J@UHPdi&fpXezuxCH9s#{lw@8B=(}F>&e$$5woX>3I^Ie`8 zs}K*VydY)H3nD4Fj~~5NA9GP=--0>buXxn5SRX(r&vQWY-Z}Ct8}6eRImQnLpLwAY z`+KZDx5F0mf;!{EX8oK(`!3*wmOs8`Vl9YW40!!&QK|$i^33;c`Hedl1qR-#ns(QQ z??2;^van2?S<&w=+zNoZWd&>d;H4cv@-1D{Xn4vJAFEk8S4h=i`^7mv$uzhHkSBQG z{Yw3n#3}zfNjkDG)T*BQkYKM=#k^!78uYQP4^`+8VmcFEb&g*^j0t+L$rKm6)9kd* zqiz&8w`k(3fqnB2m%Lfmo;eKCNLgUfVwU z`n;2{OEJrEDi+A<$HOEAn7^jPV$Jty{~65k)VBKh0j@W!U8U&L^uiXR8!%kS@t`i0 zdzlyFGkl0OE;Bq$e{y$PhxFZPEWA&zZ(nQ>5Q3e%^d`8%g){HHrd+eXyseoT*O+{Kn-Ey#*OEincvHFF0BF@mU1%a$wPsnC&dKd3M0-7A zn`06TnY@Gk`k9{}56`K9iPLgnSr4vQRrWp{VkjD*@(58gDT%xE8Q0p?BjbS(+tpen zl=sX}XixR4jTb;38Ju3*y3_sCSll?B658J9)0mf+9m(c!dTrniv^i{Pesn?iqU-!w zpfDZX!BSaHDvCA+c$r6i{sxYEkIxeQt-wt*(&=uQxQs)T_>}SyCIk?y57` z3$YzZA0JYN@q*Gzfx~r<6&IoGDXF9C^v|ut5%^^->pSxoC`RcI=vQ&^oiATTE|CSP zfSVbKOGgaU^lHxEm>-^@_C<@foq7+)R<^mle3z&1X~__?i=>fDj6<&&Az`4C24ig> zCpKUx3h<7*l`8&0WuEk%1?Fcd*SP1HAPc;L8X6#WxZxQAy@kQyg5vAvrB$6pyCLx( zH9gWy-8V08Wf^LBgpVQJT-Tj?S!f|xKq8U#a@P*+Wk1DlQ&B~BV^XJ z5uqxH9bLc+sXZexUlvl$LT8NjX)C(hn^VtWgu)cHny$~NJ!PfB->=oO%#Gcs5Tcd? z+pgi!QC+p(70^z9D!AEAx9n50s*`4DSiZohsw_mX!|&9y^rkuxMR4tOBK$+~smJ#W zn}eiH(8r=7_PbtlYW0JAO~#^Vk;H~1@`@yO@V+qsK}~6qoT@6HZkdN+Dx@xU-#LAM zt*(Ud?K#5e4Lc(f7KF--qQgEv=&m}Vq<4#*YF}Dno%J9QA#Kds0puJKyD|%P<{ybj z+!dvg0GrLJC%SA>!5*jUNAm5aj2n4D_rerV$0We@RXOTWg6{OBZKzn+wPd zynV5wh;E%XuVXH&Eg>daSmN7RAm2+0+h(P3TV2rLoS&vh3GEoPROq8G11c32&Vrl~ z3{{$GvFU3be)k2<-3q6x`Co7)hk&?4x&qDRpB8SS-_QRLz$Pba)FvT zITDUHc-!pja0{|n?!udi&B`aLQ>a1_WO7A0l(qs(7Bh>f+*72bHVe_?%MNok(B@{c z!YQ=MC(LGJ^BXTbX8`QGF0*x-y*qa|#-uQ43w#&{P64m8>*R;B;u5BD+-jD}uy;~l zTUh_FrU7F00roNNu&KN!H1Vf1HUkyY+sF~{a?E$q7}gcNc%;!B+0L%e0thUw2?bk1 zny$WiYZ{i!Yq~mTI)C$}WcYd2G~M@&GNlKV{r9MCUIvO4AhPIfGW%~gp04dH0lz*- zYP#gb>Wch!%W*s2Rm5>=gh8~h7`H}MQaLMyFIyG4q3eMlV0U8aCfJPDjkKsqidDY0 zOWA+=N%y??0i`-?nwX%m}A%q$+o3Q9$F)!t-)h zEB^Z!Gm-?vgRTYk_1LdKF#o4O_o4{x(8vr>GjmbmR-f}UhmyT+**jU>NaOf&VtCX+ zY1gOncMFdNgZn>>v;;erO>u_NTM`FC54t>k&kVBGMl4|DUyU_pYg*nVSxLxXS2qla zGq4IxWw?Ux>w(F^f2nBiZe7-UU4jAtv4B}2l*E)!whE@q<2W9r*j{JbHlSRoD=!n7)x_KeRi!R&=Z6UPg^>N^O}Zn1z5 zAZe#_@-5UU3~YvxK)Ff6rMZfnwxT4fWW%;|vMsC0c?ziIwR$l1NC8hA zW7ZWLYn>jZSr(jgnH#1n3c6fuH=?*59u~&qW!B6@Vv^tXNz83}Sryp4)8WHUfy!XA zO0Yu}jj@r;HP}c!T4CsA5y>`}=*0d(%QuTs%eV0F>_hBnj@9ZezlX2d4 zR?acH8@vg(w&vq?#cRn`^BN^W1aw{6{GRMR4u(p}JC9_JgwiW60)T5ba^T*f1Y6bC z`=9Qer=6d=jP5IQLq>n0$if(z;XuZ>p;X5;c5Cv)K!qE3 z!t7J|GBJ|2yZJV$sml_E#k3fz0}s=|Mr8Yfu!%2gh8%)x z0j{fQu`xNHpPsu(b zs$xJeC~5KF*4|DWQ55FojC^)OQv66xjqEO1Ay_l7l6@|3llzPxCzzMBR!ll~VyhXd z!MKZU!EN_OtsHCLryW+vyBmx@0%0q%h`<_R<1cz>&Y@jmC_ZX?d}~W z3;9f~Sj^lT&rEaaL>i?Ni(n0RdjWI`sVc=#SD8&bfOJmi1?Xs_l-zC=pm&spOKSIob`P{(X4;dngO-Wy!jd zcBGRtP#^$aQ5bBGFHjNhgOfgJ)d8ZBQ%N8tPf9}i5sa8Z@HLlb^w0H6d7CC zFwC@2Ol`wFfj{iyd$$q8U2)`UYIGZ`<9*2V75-TL_18yE_5`me&u&5K(rXXCd{h7G z<4g6^2Ap4Ql>QzQ<@?5k6XU8Krq}sTDRu~cvS3(g0+5MtY)k-OgVFubjrefuPR`uH zV|Sz*aSJj_joL5k)79l>X4mH6qSxWn;(GrXfslRxGB#+ zlEJ^}I^zg(ii$MaZC@)A?9S8bcz2gb{(^kGkD;}(_tj62wpY+zu3{Tu-n~Y80-zxNXBl1vVAEpUxHwmVk^c>UJx4D**>vqHZgFtg0hv-j$++{-I8UZ)*eM zWv~H)-4T5D<2{RG*opq64)$A{W1`q({FWUCmq@ubw-ME!_(#&0tHcGl1NnqI+jYID z$$DL_M|(^LHmPB~g6)Rn{CzjFRs@-Z~FXHkBRjSX`Qa6@R_24hc=p`c1DJG z?3J|zLdliW5CND?sR}Tb<<%k~O1BS8f@+5J1=fYPeeUN7SC0gO+Bak=T-@DdD_vrt z*nXS$Dr0+vlGvESZ~Lw5t`>nzI1qEW+dELQ(c%#6;`=fI{8qOvgN+DO!ZW-rtCPxA z6M8sVSd?dpn{>Q(sC;87T!dqT;KWVPM+W$E9R%K z?w(W~lpo{F!-LqK%8x6qz^0VB8w-DW!`ZxO&p?KtiGG#(327Kvs6%4|}<+OryAn&73L(xFm#Q?pFnl$I3fVl@6 z!7z=acMXq0N?~EQp{seT61nvDyCFLt9Cs%L4S>Fj@6&Q^dvXUpUd5;Fl0j@vM|V$l zT3YehrK!>?>W#^mx*3Q1HL4{vbUb#=ZDCo%n+$u3iWWK1?u;Y`DrREu1Xj{xAUxXqJ5bTlwU%&Xy!M zV;?tM&za7Ooy@E20)1@~79M#3D!4%{%>_M$Bz5}ouAKrZ_}0c*aPG(GVcAeNT-Av| z=!4bn&Or-URtQ>NJkeCPmdjkE2F94p?z(XW>D?vVsAg_yH(t(!-Mcq7=0pZ8=v>y6 z?MC2Qi|+%B2q*9{k)T>%y+g9yAv-9$1}R3~e&K)J0!7L`hC)21P_<79?}MuJ>LW|+I>TeGZuMchN!GT`ZlHtV+Wu8!4{@MexEDaGX7 z9dxvdEA{;_u9v%G#z0@Y*!B1#L^;Cb&5)~dKoM_3D9{68G?9)7`=oFS-^gRVHfU?J zbCLVf#y~5pVoD34F+X%uA~<%YT@`}4R8o)#8mW?`jnuOMPZqAugWwzUd+XMkmOSac z6!)N;bkglsm1SFlb_+)3SE<|jV%f3%@@jYX_gxlNq!tFJ$dfssB)n?d?t!u%Geuoq z-JHNRVByMm^ucm9_1a!7lpH!aEu#w(7@yB5L-0kXGx?)4I# z-(oMPfB+w>dO_@pP4bXtoVriVmOn&lkcSHkxvKEbQ5v&gV26yIa_k@s^R}h_R$`vD zb_xS|Quu1Y4(2GevGNfarT6ioN_wV;Rb39xrm=?TN=Q zz6qcN_ry#8Duip>7JR9wkcp}<>R*?CU+@_Y7dcAk*;;LOX@d#uP zlK*%-^KCmu@pq4_JJ{y`tfS#B)7tR+(yAZmP!zKACBLG%e^p}7y=+yB)ULYeKQ#=K zz@^CkbsNfXOn%81c|SJ>0jmx>L8gz(!r@xk;nAlhdZ-+PAl&@0xlu~n;Tqp^9Wmqz zVz!SU9>&Z0ud+_`Iy<{O~3eaB7z@C z^Pl*{|NlwAc=GnUpVLWLX_{&1S!$YT8(L}q7q~1G`0?K1t~<0uROCwN@jRKw>(t?9 z;$qR$)1hb4)k3CrWaVa~*VNV1rq|KcV$}9l zRU2w*JLgnlP|HfT$B)eO_o|?z<5*qEy^8n|GKx zwl93F>h@}sC#Pueqhu{40_&fWPd|!mFc+@gVyIs+@6;&!mKHn7Y0 zktEc#lJ~9VB`nXS^W-kGiCd9i7Lx#VTTivS=G$p)0mTE;ew;%}s%;kLaaM5!IR#ca zj^j#GSrC5`UIG(o@P<={PqBzQ$II9e0BAEOK+QgzNPM!^-f?GUX1J9eBH-Xa(J}?v zuL~lKa*b@c*0TdSlVh<{NjfqH-kSkP+e!z}bK4&v%A`X%mS9eM{<{H!#Hpag?AXwj z*Mb8Cu6gv%d+6Xo+j11bz2PWAjUc7`@$xn(bpT+0xylX|=DfX>9*w9S_e2{d1^^MY z1&aKu!yrYlzJt+RUB;%%r1$Q@fK{a=bO}724pwK|kJqUUPpJx^6~PzL?pjwpE5d{=C7 zwK(4$C@9A_9%Hhxocjghx-+rJA4a~^*_tSwzO1Dar^$iT?JJlHG;@b@`n8)9Rn4>^lo zzv`%A-YX2-Z>hH061?QPI*kqB2bAwhW81n8Q)8ve9(1GqJtHmBAd;bdxaUK>7@>Zx<(K=~EWoz}pJhxJB(c>CU5b z3I5P=C}nqoPqni~GE?|G@q-NF=|%#dB9ODD^)=r2bmNxB>5JkzZ(ZhMfNKo>;N5xQ zdBcZ^b6I6jmyt@-wv_u>N!tw-#pSBj&&xs9%n?h(4-#@*dTq`mfu0W-I5LG1HEBdW zjMgeh$Ye};x9af_mT*6bJKj0o+DfhIb|Ah|hxD~}bL)I~Q+u!d zV;n({ZSVM^$wWevj+vq+A92kK^WNl5IAxgHge6F>ZoFY^EZ!(V`6^@T#i7z{j!)g7 z(U+U`717?7R$jhSJaQL{EGdCSNtp#j+(=}iR-&97(t!IosW_#makYq{x?SlyYoO`P z)-0M=1&Y-A#WoU>!K=+%5V`GmU%QR?)+|qM-DLIyn|FPN8vv=5IZdJf{!+Tsvt?fW#J3qTH~r3H7cjnk zsueg}jiZ|CX7vKQ6KAjOT;ACw{*1uk8%rR^x6s-lD1i+b&xiy6i0->ivHpl4`ZHz&d1IjCDE{if2pJGGCT$4xk=O>`l0A05+2MGE##H-2(>d+;P;JU|_9 zsF!Sgh+v2N7=;fZX%cWQF3-^o` z#0N5pJZVlb_TP8!Qyp{_D-@V6TbuA}%~3Ju-}LWke%$|POv3b?kuftkpBs^3yyE66 zW=e0SQp}>tlhd@Z;B5O*G}iNDjrYK%6mnLitrzE#76;X)pTJn3q)AL9^DA!PiF;)2 z=|Fbuke3z&9(SD9SL@7RPG`$vb>wGe=LjJs^_`7*;F01u$$s_8CX5vGd?P8T+$Y%QHZz&++Ge7-Ds+XM#E~R zogI=n#ygkFHlIZ=R>N$>FM&;LC+^oaOsw?n$fE~K&_7&HZzdcShb?awU;qZQZ?ENx zTym4|=*Tyw(U{G^4!?p6URYEFO%?cM>TP+XY2NInja&j)WHpH8L(r?ox#!jIC=2Q8Bd-@osC zDQC3o|EKE1)3nh!wIfQt+uw6sr9Y}dL9xHyhwKf=68j4dE|;+Ff*ZGGuCuC#*L|OO zImr@mlAc%Hxq-}Fj61`gWX*$ofrS_A!>niqkp^y+URL=}n|_Mvg+-6T=z^)!A8b;~ z?$6Mwv+~GaZPa+I8Uevyt&}#eX1)xW+fXiv$0=^B-+SJVf_v7oWGG-EcfoJYhUc=o*0bA>P0TNA zoyWN;L=fqXs`?6*qU#=x+gYaTS8=PW1z+<>U|hv@S?aUo>}xsAjyAtm-_EzR<_uhN zfo(a!rWUd%@&o4Q(QlMLU2Yrp&meIbUPtEV>p7Fc&q1H(;Dx5jp%FIc0WC5OE@ zkGGF>(9zg#t6U{^yT@catltd2%vg_7#0GeNq0r{~>O^pPe}%r(`uty;awwRKcpcR_)*HCU9*TRsR-U~T;Qel2<| z+9r9~Zhh>*Ym>s+r~sfK=K&&NxJ0sCA8c(u8A-_+xi&`KXQpmv+ZZqHHoFBps9b;< zAbQg`GX#ofipUX^Ye80S@A%e%OPr*uX z*>eyh`&)a?$hojqaIlMXFkgF-0)pxJqBqfXf7!V{joNkCPvbz{c74w-%2s(iKLti; z8WmxwW^N_BJl_Fy8hsrUIVms6iiUk3o9vZo$wpl_W`TfxoKxkzCqwduceTJQ;>bbs2JrHjp zU*SX}jNd%_MRY)u2~im@p;Xz;)oSgW0Fdd(UQoFyWQez9eOur5UO1rF;r@7$+a3qB z_@ePZTlYZQP{=Flw<|h)ZJ`oo^S;);E~B*tj}_dU}BKVv}BP`DZ}o&XCV{ zG;VPBkV7!Abe$@a9fvxC9wZRp5A62Is;VkV&5;Xiel7sD;*oWQ+f*(M6*7aE`PsUm zL3Nv$>o;@ud!s+9>S;?e0L8F$QFqWfR^S&aXWOG6)96^mM>iU!?Yzy7uK;B(x!W}F zjLe$i0ztXy+Ybq2Si7$d4B`38Mc8qivt!6zGEUFK1`-J5qc4OE_XzDiWvhd59*|;U zF3m6D9FT_#;#H)OJLX({gjo1EIDK$ zgWR7UV@Wc!!2_Au;Yxmk)2whGfzYZ)W@){3+Ov?2z|G0kPF1DGb?fUYf=lbO7ol4s zQwla8jU~CtD1i(Ulejt;Cp`MJnn|Nb1MCWHQT6RGL@3fo#noSZpzJ1TtT6-3nvtq& zX(+|e5)_%=vlz)f7fheZO*etK_NrQ)kCw>S8rCaEYfw+Ck!HYGnp4p_qH7zaTU^<$ z{@B^YJK)@1r}?5BcbT|d(4?}rc&ba<=iP9x(B8q28G( zjb4M5d0SFx_|d&MWgA6c0(4-FXx|I*8-oY_-2_?1yK*_OHay+|Jb%oL`H9@@PnqfR zO$u%T0dnbG!2TDa;Wq}oW;aWU26G(m_iIm>8^GfgC&m3Tlkd4~6zS7KXQS6%NRH&2 z>(OuQai}(zvF0<95T}=!8>clW(=Db!4k8{R5+Nz6)#dC_<>5uT>1H!)===m&7*KU+ zo&x3e=+MqMqug6aYT8XV3&}wt#UY$L%+YH0l)}3MuC9dyU?pO+OId`<+RcM$S>UKV zj?&$lenmM)!32NH>@BfnwJo2=&*H=p=CR5+^QH=HBvMpyZioHvJ z`?2D0Bo05H$ul2+=IBKfln+Cb-^d(ZSS~IoY5OOs^_|LXMb_hY|LSbGGbioaV<+F! z*w=O20Z2LTCcKk0FOErgZl=Nq8U7E_G5SkJp(?Z{@JLv`?$FFtg923 zP9X5DDDjo)7ed3GY{=#sD%_JK_;v{eWr$>m*bsa3a1Li+fBaq|*fLD(cEpaonVZv_ zZ|}S5fGzBsJG-*pa-|G>`}M&u!ES(J>q_hN0oKRL1&oT}GrzI}_l;QorqlMNtT^1;z`7lP z2t~iB?dJZMV2nRDT^zN+vi%lL{{PMn`&EcT7lYnTfIt9plwCyHO#dgCn1!DCFoM~i z-3(638heO|ZAj6U6XSUsh=!j++8SPoKd-scHyDs_8`okJ8+3)O|17TCMB$KHfqOn0 zl$PrPs@rv0yi}bFK0fy^6KH?F)9O)WAT(xup$0xj9=rDj&U<$Y4GD=C`z)urTlQJJ z<~JJJU6yv8!9XWL$X4`Hj&7qV@lYU!yZJR|J9ra05K+^cQrSJNOJ{=p0_wuD^EimO zY_WK*Mj@}+#M^Yn-=;QyAWN5nV|O^DxA&>jPT!IS9;P>y>3PvgKp8xXF^?pzCY!>z zv~Z%jcep5n&vA2Z)&{Y%;Izau2cN5YVYk1~&$?#gh_ssm+&0%U0&Jxqj1dk-kVQgY z3MX6g!lk4$ZrI)GRx3e-EzJiH#o-Uc;oHIx*U89xv@fO0rUv|o73p?Ywgi?v1rN60 z)~dK6vo|rSzT1b2&w)XU#*5SW+Ukj+dNbjnE}Z>d4tY`e7IsV3w8&3)iF~*vR5wf87l}Thblb5 zZz1ui2d`qjt!=6VrHqR==&RXuKgrrL3zV71Dj;~BZ}cfuuoz?40gQ?&WESw=zD3TK zg;k+NYSKrxsC^`Z7}abZk|1-&{fs+*(#8xZ8kB|1(P4tz0Duez95ze*fZFnm`*ySl zRacONoEz_+i<2~{s@Rij0it%$Z0XhYtGgi)Ih!Kw6i+n40(FJ z0F=lSGDqCBk%*S zJVRtdy*SuL%}&Y{P6U0}x!wH%k(URN2snSa*JD$HjwAvA!t><-Zf^=NXzGF-4xWrO zqq~QC#jZ6AH%`snyN;!vr!`w+vhCRi5J&0CP0*^NA zp+$`O6Z4JP*|^yic`s5R%>dgIBNB5&#WTz6iZoNKoOrgJWd4jK{zD})sd(Z+k5jT! zEb1COu4`AX2C&u3n%X8;8$znY*A5AsjG?Z85pIvrVmIc@Y8Md*gjpP zrF&haG|$RrDrj&ar4m1CKkGx86>>9BeznTjHav=ca6#C20O4z0?G7dHC`1}gXQjHX z-rLq9su}ZY!4h>}bW)yuPEfX*CoWQa&8(2(Vw(cC#rRaoCPL8ETafsi0r9MTW4&2xTkjy7+Na2Dp*At_ zC=0v#w3`Csy4sY7h6!e|zE(+aFvWCQnC2YLKMeS>5N9la?r(lC2b(WKHGPA6>+M~i0gIDgL@ML!cp@)~wA}mt{Hq7zaqoCfe!Ca6KM!0Mm4M1&B-XQnGhzgrN9p zvcap?>FqWSP^B!UXy9z|D1U*iyxoWC)*Fx#163Dw3rqcVM@vU%K3*QOjeEftL+$8G z#h~~FL$>`B;`k~2DN-Qi{VdDXJjB`s^VJRYp(g24Io)IdyHe|(rmT5*XS-}lNt-v= zVnIP*WyY#t)*F&B1m5V~Znw5+$O1ZTpz;rKy1|fZLDg%toIL6@l5)@tnOpkF@-fn; zpw_It2D>sBxTy-K{!rL?9tNYrep|^Y-?BV0D=VJG!L2?U`2q}VY`|L`*mjdy=q4|@pWWEgiM(_J&`>x*JFE>*+D9V08xt|Zxj!H-k zadSU4EZ;!L^Zl8B{#k}?-=O-r5P8l;Zvr@~-4kv(sc5*z#Ox1`JsZo*={<#F@ItZ~ z>oe_h!7q3>DLW5YcI2>U{{!CjPW#eG=8jhlZJ!-zAgB%j|360t7T_XJg<8A}NU2T& zkD7HtC|#~a-|W`>;&~*#)resXe3;PgZS6)A)e^_z?K#rBMc%kvF!Y`thMbhvCEh^? zRBuV+iw7CMEvuIUI5y9TCi;$TvunF=t6m3^NKEWl)@QXjU*~+&=$8=WzP~@i>A$Iu zTVHYKx*$_={#t$4UGsH~yRcC0^869fdbgj#nCWNZTKWCe>N+PElNJ{{7Y9A7rVbN5 z8<(ygy(WhyGrbNA3o|Dd8@skHo8}km>7iTdpMl~DZnq1A-+>~DE{2T>5-6U}x#B^Y ztLkK`_~2Y|@66uad~V4*xvERGw(Xnwb2eOoOxi*y&tH3=?Hqo`RT*&p!Fqs)h@3&> zC8k^6J60MxHFWehDhDOWi!)ApcqVu zF~BaH$U1NdF%eIwE-=7dW;vU#Z#IaGUV+R^y1VG42NQJ1Cx*DJ&d+R@5Lb|MytEUr z-vtv-rkYQ72&(03_f{m(x~^3V(yNtdkKs?L7LAKz3oZlo8T&V*u~paWtssbPIA~Et zaj|q~)kWGFz6RlUU5Z8{Qy(Z;^Y0ezHm!GdaM(-s1vqZB0aZZ<$pGxs2-n&zkNuGX zq=V}mtNzg5m;G{x$+A17ZKr!iK@7Z2It*ep!6_ffu<*BCOz!zU5MfE z0r#zWKk0?uoB|gJ$i-=?7tB~-2kUI6HEXU_7*9;lDj#Dsh2NR79?UnFw1+-VbS+qB z(q&CtxLORTb{BS9!XPU&H5iPU32UqjczIAb)>aGQ8_>nJO(QC1B$nJ4HXo_7Q;cMH zLhz3uGzK?gWheO#0GkYE>aMj#<*R_SVu(5W*kq>W@44`aG1b0`@iL*8%-3 zaq8E~X!4yWjE41O+uK`xd{eLQ)!~ta3^ZBzj=V9N+}YXCL1r8N*4{@+n(IvHp&{dC>Px5`LygF_3Z&Y2`c=@7}Q4M$m4GifdC9sXd zBb)S%rlORe)b&O8tdBO^(!WaJoRF!Wmw0ZCjK4)c)Ki?hQNL+mr`pp;j2^BsReDKN zwv;*A8`j=B#K%;-ASkNHSGfGD=3WYJh+T5ct%gOW*R|M@m;KK!@De9KpDbMs7K#0W z5}6>AEYe7n_^AeZf6sy_zsjwIi4Jw4ggE8$l+v!z=Rv=_;|ZSaJz^ORHNF0uP&Ni0~FBQIJ5REJz$ zFG>!rwtBpEXQNHV^xhP^D8O{VX|yd0UKg{R{b64tYxcB)ZS=#9e&W#pc9B|C|M_hz zo!A)F+AQUTZ9bfU^ezoT4?%1|msVGbw5N*4xU1 zC0wO!2H=BrqnR-JLK!DvXBBIc4?7ZR`3=Sv1_0B2Sm+$Mu&|WCj#233SaD?zbD3dr0!h=V*TiF*uOpPL zO0&wi5wlmsks#4PT>N8U5^EkMK7S}LZNPNPw3dX3lYC=ReM@vp`Ull9S^{w+f! zQPmY2QQEySG(j}+${>Q0k7>Cs;`s~h`Q9=XENVt2pXfs&{(4k>4Es%(rW1qIJZ#7W zeODEuLdjuxWn@4SQc1tp_gx;x&)+l60kqrBU~&%Gw>ekSK$5gG6kOs+oYFX#5Vh({ zetvstp-7-NK1>XR1ruMP$E#_WQ*0pcYCHsqRQ=h*!is9#aPPbnFTtqu$}LGn{T$MIt5LyX z9ahSVuG9O3p=yF$T!qQ{UBk0Ylye-ISet&BA!*6Z&B18vp(?%Q8}4I8e*Es=-u=kW zWvM*S&@nV|JNBaHj z+duw5$&K%TbA#;&36w|LHs9(6D5lJQd3Km-Z&r{Cu{X{9t%7f#=0d3%cF2=q(&jCa zZ-xKu^K;2S-#@;(2*XvoPg`-!81PuO=a-dSoeRIuzA6=qB6iX-)NO-ZQ4%+uN`mS~s?1~b z>}SzMS@vR8WUAAGdSz;rEPC@zEN*BJX>@n>b`6H9+K$xexU%KYL4jiWtZMs%Gtt(H zW;cT*x8ge}>eCTJL*osBLvk^+{h`h4eyIj7mWG!|5YlhpQglk3g&*hK9*LhiO?Kw1 zYv<4D9WYS7`H);fS-5*Bt+l10xs`^srk05=awpU|lrx|IoqqCYjX&pVz&lps{hw;o zHqbRQ(=_~A^Wh$=f2Bq`=ygD2+n{?UNHSGO|q+saV}|53&t)O~W-;5a7wr(e|k@2>~{-k$eowI6oN zLhFUS=ST(9BWFX(V@*wj^!+|bsuNPCB1s)aM*2;!I!x0P-NPnJjU46|P*G6mj*)_6 zo=obynBQGUzKZ!}gq#ou=;#RfIWg4L?}|Mfa~{a$my?A1mK6#q(I4t>M=B3F7``7@ zj@94$tNOnt5kgj9@+*aX?H4~+fAd)NbjXFn6WKqk{#gE?|4xbN7$u*-q;x!W(0`{& ze~jw--%>q{bY-Y>n1tqcB!A2@8S?s~Pmx`H3t4o$531mttkUm&@cYitBYp6MTr*$$ z;8%)=?p62F-KXr31MD2K*B_rtuM_-%;`d(vBT*Pq%0tho6Gz&~9f#zYVh>MF>^R@k zBK=N}Cr?@5q&gf5anBWgLE+zeJSNp2-giF_2hw9wfqp^rNEW95F4l1p_*YI=-1p2+ zUwYs#XTh(U`}sBUZNf!CIh>PB{zT&c<-H>DU15if)cCigj>p(%LI&ACl0o@s-bJK- ze<}Qr;YTibFfEFN;9)mVP=x;B=qs>zjLZpjQpfzR-*3YIyB{6l=u0E~a!`J4gv0gS z@3S`YpW~XrLvr^2&c{!h`6ases+q(0@aaNMvK{0Up?Cdf&4m9gN55^W-*?ZieEd6$ zI&xegkFPCsiRLdgb230U%z<{|wDpztzjJ&arM*b|m$bk4j_-5l&%NUhul2(z`-{;3 z{5jq`3Xv`_|E+hN#K%v)BFA4c^fyrJs|F6+z|UC^X^v@tm+Nme z@Er{Pa&VkXkNp@u;D4g`Gdlc_r0*Uhjm7<6L_I?KUvTk1vLSGc4e$Rm8^`eDf8^mf zWIcskjr*&q;YS8aKcgm!?+hI(K^|!z`mNr-1jGHqck!?D%%SARb5wqvXAWNlanG~=j?nSgyT8v^ z`FEh{c&4PIEP?O+mF)j9ZxWKLUqYxKiaH)#`72jnfx-=izvJpRp_c!wn8UU@9vbHnO44z6I{#Vs{bMW|J9m$@_c#R^40k-s>j0$Af@^X#UBqUa8&WPtbUg2 zX#M}I5CebK{o!CdzP0Trmk(Ti$L0TQqucLB=gD1!?Vl0ywTpy${f^E5Yd85y`QLud zM-`;{>UWg?QbB+Adj4}e9u1Vo^OB0=Ksp&=2u)E+VoIR*pWY@NP|jR E2UXLG>;M1& literal 0 HcmV?d00001 diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py index 60eee8571c1d..d60a89858512 100644 --- a/python/ray/data/read_api.py +++ b/python/ray/data/read_api.py @@ -27,6 +27,7 @@ from ray.data._internal.datasource.delta_sharing_datasource import ( DeltaSharingDatasource, ) +from ray.data._internal.datasource.hudi_datasource import HudiDatasource from ray.data._internal.datasource.iceberg_datasource import IcebergDatasource from ray.data._internal.datasource.image_datasource import ( ImageDatasource, @@ -2312,6 +2313,58 @@ def get_dbutils(): ) +@PublicAPI(stability="alpha") +def read_hudi( + table_uri: str, + *, + storage_options: Optional[Dict[str, str]] = None, + ray_remote_args: Optional[Dict[str, Any]] = None, + concurrency: Optional[int] = None, + override_num_blocks: Optional[int] = None, +) -> Dataset: + """ + Create a :class:`~ray.data.Dataset` from an + `Apache Hudi table `_. + + Examples: + >>> import ray + >>> ds = ray.data.read_hudi( # doctest: +SKIP + ... table_uri="/hudi/trips", + ... ) + + Args: + table_uri: The URI of the Hudi table to read from. Local file paths, S3, and GCS + are supported. + storage_options: Extra options that make sense for a particular storage + connection. This is used to store connection parameters like credentials, + endpoint, etc. See more explanation + `here `_. + ray_remote_args: kwargs passed to :meth:`~ray.remote` in the read tasks. + concurrency: The maximum number of Ray tasks to run concurrently. Set this + to control number of tasks to run concurrently. This doesn't change the + total number of tasks run or the total number of output blocks. By default, + concurrency is dynamically decided based on the available resources. + override_num_blocks: Override the number of output blocks from all read tasks. + By default, the number of output blocks is dynamically decided based on + input data size and available resources. You shouldn't manually set this + value in most cases. + + Returns: + A :class:`~ray.data.Dataset` producing records read from the Hudi table. + """ # noqa: E501 + datasource = HudiDatasource( + table_uri=table_uri, + storage_options=storage_options, + ) + + return read_datasource( + datasource=datasource, + ray_remote_args=ray_remote_args, + concurrency=concurrency, + override_num_blocks=override_num_blocks, + ) + + @PublicAPI def from_dask(df: "dask.dataframe.DataFrame") -> MaterializedDataset: """Create a :class:`~ray.data.Dataset` from a diff --git a/python/ray/data/tests/test_hudi.py b/python/ray/data/tests/test_hudi.py new file mode 100644 index 000000000000..af8035cc315f --- /dev/null +++ b/python/ray/data/tests/test_hudi.py @@ -0,0 +1,114 @@ +import os +import zipfile + +import pytest +from packaging.version import parse as parse_version +from pytest_lazyfixture import lazy_fixture + +import ray +from ray._private.utils import _get_pyarrow_version +from ray.data.datasource.path_util import ( + _resolve_paths_and_filesystem, + _unwrap_protocol, +) +from ray.data.tests.conftest import * # noqa +from ray.data.tests.mock_http_server import * # noqa +from ray.tests.conftest import * # noqa + +MIN_PYARROW_VERSION_FOR_HUDI = parse_version("11.0.0") +_VER = _get_pyarrow_version() +PYARROW_VERSION = parse_version(_VER) if _VER else None +PYARROW_VERSION_MEETS_REQUIREMENT = ( + PYARROW_VERSION and PYARROW_VERSION >= MIN_PYARROW_VERSION_FOR_HUDI +) +PYARROW_HUDI_TEST_SKIP_REASON = ( + f"Hudi only supported if pyarrow >= {MIN_PYARROW_VERSION_FOR_HUDI}" +) + + +def _extract_testing_table(fixture_path: str, table_dir: str, target_dir: str) -> str: + with zipfile.ZipFile(fixture_path, "r") as zip_ref: + zip_ref.extractall(target_dir) + return os.path.join(target_dir, table_dir) + + +@pytest.mark.skipif( + not PYARROW_VERSION_MEETS_REQUIREMENT, + reason=PYARROW_HUDI_TEST_SKIP_REASON, +) +@pytest.mark.parametrize( + "fs,data_path", + [ + (None, lazy_fixture("local_path")), + (lazy_fixture("local_fs"), lazy_fixture("local_path")), + ], +) +def test_read_hudi_simple_cow_table(ray_start_regular_shared, fs, data_path): + setup_data_path = _unwrap_protocol(data_path) + target_testing_dir = os.path.join(setup_data_path, "test_hudi") + fixture_path, _ = _resolve_paths_and_filesystem( + "example://hudi-tables/0.x_cow_partitioned.zip", fs + ) + target_table_path = _extract_testing_table( + fixture_path[0], "trips_table", target_testing_dir + ) + + ds = ray.data.read_hudi(target_table_path) + + assert ds.schema().names == [ + "_hoodie_commit_time", + "_hoodie_commit_seqno", + "_hoodie_record_key", + "_hoodie_partition_path", + "_hoodie_file_name", + "ts", + "uuid", + "rider", + "driver", + "fare", + "city", + ] + assert ds.count() == 5 + rows = ( + ds.select_columns(["_hoodie_commit_time", "ts", "uuid", "fare"]) + .sort("fare") + .take_all() + ) + assert rows == [ + { + "_hoodie_commit_time": "20240402123035233", + "ts": 1695115999911, + "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa", + "fare": 17.85, + }, + { + "_hoodie_commit_time": "20240402123035233", + "ts": 1695159649087, + "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330", + "fare": 19.1, + }, + { + "_hoodie_commit_time": "20240402123035233", + "ts": 1695091554788, + "uuid": "e96c4396-3fad-413a-a942-4cb36106d721", + "fare": 27.7, + }, + { + "_hoodie_commit_time": "20240402123035233", + "ts": 1695516137016, + "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c", + "fare": 34.15, + }, + { + "_hoodie_commit_time": "20240402144910683", + "ts": 1695046462179, + "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00", + "fare": 339.0, + }, + ] + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(["-v", __file__])) diff --git a/python/requirements/ml/data-test-requirements.txt b/python/requirements/ml/data-test-requirements.txt index d2d435b09d88..9ad22340d031 100644 --- a/python/requirements/ml/data-test-requirements.txt +++ b/python/requirements/ml/data-test-requirements.txt @@ -18,4 +18,5 @@ delta-sharing pytest-mock decord snowflake-connector-python -pyiceberg[sql-sqlite]==0.7.0 \ No newline at end of file +pyiceberg[sql-sqlite]==0.7.0 +hudi==0.2.0rc1 diff --git a/python/requirements_compiled.txt b/python/requirements_compiled.txt index 1347afee24c5..45d40d81b7a0 100644 --- a/python/requirements_compiled.txt +++ b/python/requirements_compiled.txt @@ -745,6 +745,8 @@ httpx==0.24.1 # -r /ray/ci/../python/requirements/test-requirements.txt # gradio # gradio-client +hudi==0.2.0rc1 + # via -r /ray/ci/../python/requirements/ml/data-test-requirements.txt huggingface-hub==0.19.4 # via # accelerate