From b622bec794243a7c98508b1e1cb9d30d68b28b22 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 22 May 2023 16:40:39 -0600 Subject: [PATCH] Handle len of -1 in "compresses" buffers from other languages It's unclear why other language implementations will have a compression set for arrow data, then indicate that the length is -1, as a sentinel value that the data is actually _not_ compressed. But since they do, we can handle that case pretty easily. I'm basically just adding a test here from @DrChainsaw's original PR. --- src/table.jl | 5 +++-- test/java_compress_len_neg_one.arrow | Bin 0 -> 6050 bytes test/runtests.jl | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 test/java_compress_len_neg_one.arrow diff --git a/src/table.jl b/src/table.jl index ff44f058..49b61536 100644 --- a/src/table.jl +++ b/src/table.jl @@ -521,11 +521,12 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression) len = unsafe_load(convert(Ptr{Int64}, ptr)) ptr += 8 # skip past uncompressed length as Int64 encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8) - if len === -1 + if len == -1 # len = -1 means data is not compressed + # it's unclear why other language implementations allow this + # but we support to be able to read data produced as such return length(encodedbytes), copy(encodedbytes) end - decodedbytes = Vector{UInt8}(undef, len) if compression.codec === Meta.CompressionTypes.LZ4_FRAME transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes) diff --git a/test/java_compress_len_neg_one.arrow b/test/java_compress_len_neg_one.arrow new file mode 100644 index 0000000000000000000000000000000000000000..1d0f864d6501cba873150bfa1a235df1dc4ba01a GIT binary patch literal 6050 zcmeI$dvKK18OQOnhhtv@on zKkVo<9HTLdW081#dv#=cO}IJIZp=lrEn~85B8ZHUpR&f7 zhSt{2k#LK5W#*-gyHp*CC0qJ6q9_Q(l8#&ZZEEdk*c|cC_X}y`?f!b*$GCUACQ!sU z#eM3Wt6IFP{DVHmD^D2~DB?rM5cP+#Hq;b&889rB@4BY$8rXB_S45jz{5(lM_!x`HX8TFSl z>i@_XAJRo*ar>Wd>u&z8jO_<({b76j^K<<1mQ%kTEwOEGb_%v2vCF*cJR^c;PjWwvpT7=PnMl>M; z#qN4+KolFX37f&!5$}Gu`;%VkoHFjeZbN-PK89zTWJf}G$YIr7zU_tkRrcKP%=z5& z%Eps!-NnSy=wL>v8lLJOm2c=u@zg;jt*?acI-eWcH(C2 z!fxDxTX8$?z+UXf7jY-<#yz+f_u&CNh=VwUM{opR!Q(iJui;5Njc0HS&*C_~ixcR^ z_wgJ~;(2IsrHyYTM|oKp-$0He%0fMV>iVmRxi9q@&+4e>-UR5mq3d=dbZ%dT_J0cR zU}%8*h&fn|8?YCLaRRU4P2`clS7H`cpbfX;Av}v0@K@w!5igjACD@2v_%fcxPw)ox zWp)%Mq8d%;#9cUwAL2C5U|0^%UChNAY=ypK9KrYSD*lCh65=Y9VgDmp?d)d*RZ|Y~3OfBgwFjOiG=^Z3L8Tu3T^Y*o#j7s zd7fUDv(44jZxAj)A+EtB%*1tA0_9o+$|dE_E+{vY3lBoEuGm&ApTsZlYrKJf;$7qr zCl_H9#$X(#q7v0uj(SAVj!xWy{kRv0a1_VTjUVHeIE_E!EhI63I2(q`P=X1#7IU!( zb!bF0w&G^ojyv%Hj^Ifg$8-2OUd12qcf5lvVl*EkQH-&ef^t-$7OSxyTd)JWu^0E? zARfmvIDzN!B7TcM;Z3{;Ev~fjpsyOq+*MT-lbf2#)-{E~tTr@EkCZhv4%9vUQKG4_ zr%4m6nEeW2ox0!dzN=ImobP=c@ZQHc|Nfi)TKYV0UGrtU2ldrT@8j`x zyszk&m*5su(8%JK(WkB-N5##NUcS$KsrUO8djHj>-haL4JbQd+owth*J#P`+hj5$| zZd%2}!3A# zcg^FwP_F$CRNrm&U{rZECPQ~+1J6J$Z8lzd{*U$B^=(XJF8pS8;Wx7jznQt5xbT~q SuEYoZW_A